Papers
- Back
Unmixing Diffusion for Self-Supervised Hyperspectral Image Denoising-
[pdf]
[supp]
[bibtex]@InProceedings{Zeng_2024_CVPR, author = {Zeng, Haijin and Cao, Jiezhang and Zhang, Kai and Chen, Yongyong and Luong, Hiep and Philips, Wilfried}, title = {Unmixing Diffusion for Self-Supervised Hyperspectral Image Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27820-27830} }
Seeing the World through Your Eyes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Alzayer_2024_CVPR, author = {Alzayer, Hadi and Zhang, Kevin and Feng, Brandon and Metzler, Christopher A. and Huang, Jia-Bin}, title = {Seeing the World through Your Eyes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4864-4873} }
DPMesh: Exploiting Diffusion Prior for Occluded Human Mesh Recovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Yixuan and Li, Ao and Tang, Yansong and Zhao, Wenliang and Zhou, Jie and Lu, Jiwen}, title = {DPMesh: Exploiting Diffusion Prior for Occluded Human Mesh Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1101-1110} }
Ungeneralizable Examples-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2024_CVPR, author = {Ye, Jingwen and Wang, Xinchao}, title = {Ungeneralizable Examples}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11944-11953} }
LaneCPP: Continuous 3D Lane Detection using Physical Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pittner_2024_CVPR, author = {Pittner, Maximilian and Janai, Joel and Condurache, Alexandru P.}, title = {LaneCPP: Continuous 3D Lane Detection using Physical Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10639-10648} }
CityDreamer: Compositional Generative Model of Unbounded 3D Cities-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2024_CVPR, author = {Xie, Haozhe and Chen, Zhaoxi and Hong, Fangzhou and Liu, Ziwei}, title = {CityDreamer: Compositional Generative Model of Unbounded 3D Cities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9666-9675} }
HEAL-SWIN: A Vision Transformer On The Sphere-
[pdf]
[supp]
[bibtex]@InProceedings{Carlsson_2024_CVPR, author = {Carlsson, Oscar and Gerken, Jan E. and Linander, Hampus and Spie{\ss}, Heiner and Ohlsson, Fredrik and Petersson, Christoffer and Persson, Daniel}, title = {HEAL-SWIN: A Vision Transformer On The Sphere}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6067-6077} }
3D Paintbrush: Local Stylization of 3D Shapes with Cascaded Score Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Decatur_2024_CVPR, author = {Decatur, Dale and Lang, Itai and Aberman, Kfir and Hanocka, Rana}, title = {3D Paintbrush: Local Stylization of 3D Shapes with Cascaded Score Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4473-4483} }
Test-Time Linear Out-of-Distribution Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Fan_2024_CVPR, author = {Fan, Ke and Liu, Tong and Qiu, Xingyu and Wang, Yikai and Huai, Lian and Shangguan, Zeyu and Gou, Shuang and Liu, Fengjian and Fu, Yuqian and Fu, Yanwei and Jiang, Xingqun}, title = {Test-Time Linear Out-of-Distribution Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23752-23761} }
Guided Slot Attention for Unsupervised Video Object Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lee_2024_CVPR, author = {Lee, Minhyeok and Cho, Suhwan and Lee, Dogyoon and Park, Chaewon and Lee, Jungho and Lee, Sangyoun}, title = {Guided Slot Attention for Unsupervised Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3807-3816} }
Unsupervised Blind Image Deblurring Based on Self-Enhancement-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Lufei and Tian, Xiangpeng and Xiong, Shuhua and Lei, Yinjie and Ren, Chao}, title = {Unsupervised Blind Image Deblurring Based on Self-Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25691-25700} }
Action Detection via an Image Diffusion Process-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Foo_2024_CVPR, author = {Foo, Lin Geng and Li, Tianjiao and Rahmani, Hossein and Liu, Jun}, title = {Action Detection via an Image Diffusion Process}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18351-18361} }
Programmable Motion Generation for Open-Set Motion Control Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Hanchao and Zhan, Xiaohang and Huang, Shaoli and Mu, Tai-Jiang and Shan, Ying}, title = {Programmable Motion Generation for Open-Set Motion Control Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1399-1408} }
SCE-MAE: Selective Correspondence Enhancement with Masked Autoencoder for Self-Supervised Landmark Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Yin_2024_CVPR, author = {Yin, Kejia and Rao, Varshanth and Jiang, Ruowei and Liu, Xudong and Aarabi, Parham and Lindell, David B.}, title = {SCE-MAE: Selective Correspondence Enhancement with Masked Autoencoder for Self-Supervised Landmark Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1313-1322} }
LAKE-RED: Camouflaged Images Generation by Latent Background Knowledge Retrieval-Augmented Diffusion-
[pdf]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Pancheng and Xu, Peng and Qin, Pengda and Fan, Deng-Ping and Zhang, Zhicheng and Jia, Guoli and Zhou, Bowen and Yang, Jufeng}, title = {LAKE-RED: Camouflaged Images Generation by Latent Background Knowledge Retrieval-Augmented Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4092-4101} }
TIGER: Time-Varying Denoising Model for 3D Point Cloud Generation with Diffusion Process-
[pdf]
[supp]
[bibtex]@InProceedings{Ren_2024_CVPR, author = {Ren, Zhiyuan and Kim, Minchul and Liu, Feng and Liu, Xiaoming}, title = {TIGER: Time-Varying Denoising Model for 3D Point Cloud Generation with Diffusion Process}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9462-9471} }
ConTex-Human: Free-View Rendering of Human from a Single Image with Texture-Consistent Synthesis-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2024_CVPR, author = {Gao, Xiangjun and Li, Xiaoyu and Zhang, Chaopeng and Zhang, Qi and Cao, Yanpei and Shan, Ying and Quan, Long}, title = {ConTex-Human: Free-View Rendering of Human from a Single Image with Texture-Consistent Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10084-10094} }
UFineBench: Towards Text-based Person Retrieval with Ultra-fine Granularity-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zuo_2024_CVPR, author = {Zuo, Jialong and Zhou, Hanyu and Nie, Ying and Zhang, Feng and Guo, Tianyu and Sang, Nong and Wang, Yunhe and Gao, Changxin}, title = {UFineBench: Towards Text-based Person Retrieval with Ultra-fine Granularity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22010-22019} }
Efficient Hyperparameter Optimization with Adaptive Fidelity Identification-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2024_CVPR, author = {Jiang, Jiantong and Wen, Zeyi and Mansoor, Atif and Mian, Ajmal}, title = {Efficient Hyperparameter Optimization with Adaptive Fidelity Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26181-26190} }
ASH: Animatable Gaussian Splats for Efficient and Photoreal Human Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pang_2024_CVPR, author = {Pang, Haokai and Zhu, Heming and Kortylewski, Adam and Theobalt, Christian and Habermann, Marc}, title = {ASH: Animatable Gaussian Splats for Efficient and Photoreal Human Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1165-1175} }
Focus on Hiders: Exploring Hidden Threats for Enhancing Adversarial Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Qian and Hu, Yuxiao and Dong, Yinpeng and Zhang, Dongxiao and Chen, Yuntian}, title = {Focus on Hiders: Exploring Hidden Threats for Enhancing Adversarial Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24442-24451} }
ArtAdapter: Text-to-Image Style Transfer using Multi-Level Style Encoder and Explicit Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Dar-Yen and Tennent, Hamish and Hsu, Ching-Wen}, title = {ArtAdapter: Text-to-Image Style Transfer using Multi-Level Style Encoder and Explicit Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8619-8628} }
GoodSAM: Bridging Domain and Capacity Gaps via Segment Anything Model for Distortion-aware Panoramic Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Weiming and Liu, Yexin and Zheng, Xu and Wang, Lin}, title = {GoodSAM: Bridging Domain and Capacity Gaps via Segment Anything Model for Distortion-aware Panoramic Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28264-28273} }
DYSON: Dynamic Feature Space Self-Organization for Online Task-Free Class Incremental Learning-
[pdf]
[supp]
[bibtex]@InProceedings{He_2024_CVPR, author = {He, Yuhang and Chen, Yingjie and Jin, Yuhan and Dong, Songlin and Wei, Xing and Gong, Yihong}, title = {DYSON: Dynamic Feature Space Self-Organization for Online Task-Free Class Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23741-23751} }
Streaming Dense Video Captioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Xingyi and Arnab, Anurag and Buch, Shyamal and Yan, Shen and Myers, Austin and Xiong, Xuehan and Nagrani, Arsha and Schmid, Cordelia}, title = {Streaming Dense Video Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18243-18252} }
Rethinking Inductive Biases for Surface Normal Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bae_2024_CVPR, author = {Bae, Gwangbin and Davison, Andrew J.}, title = {Rethinking Inductive Biases for Surface Normal Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9535-9545} }
Event-based Structure-from-Orbit-
[pdf]
[arXiv]
[bibtex]@InProceedings{Elms_2024_CVPR, author = {Elms, Ethan and Latif, Yasir and Park, Tae Ha and Chin, Tat-Jun}, title = {Event-based Structure-from-Orbit}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19541-19550} }
LED: A Large-scale Real-world Paired Dataset for Event Camera Denoising-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Duan_2024_CVPR, author = {Duan, Yuxing}, title = {LED: A Large-scale Real-world Paired Dataset for Event Camera Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25637-25647} }
Fair Federated Learning under Domain Skew with Local Consistency and Domain Diversity-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Yuhang and Huang, Wenke and Ye, Mang}, title = {Fair Federated Learning under Domain Skew with Local Consistency and Domain Diversity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12077-12086} }
Activity-Biometrics: Person Identification from Daily Activities-
[pdf]
[supp]
[bibtex]@InProceedings{Azad_2024_CVPR, author = {Azad, Shehreen and Rawat, Yogesh Singh}, title = {Activity-Biometrics: Person Identification from Daily Activities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {287-296} }
Z*: Zero-shot Style Transfer via Attention Reweighting-
[pdf]
[supp]
[bibtex]@InProceedings{Deng_2024_CVPR, author = {Deng, Yingying and He, Xiangyu and Tang, Fan and Dong, Weiming}, title = {Z*: Zero-shot Style Transfer via Attention Reweighting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6934-6944} }
HIG: Hierarchical Interlacement Graph Approach to Scene Graph Generation in Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2024_CVPR, author = {Nguyen, Trong-Thuan and Nguyen, Pha and Luu, Khoa}, title = {HIG: Hierarchical Interlacement Graph Approach to Scene Graph Generation in Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18384-18394} }
OOSTraj: Out-of-Sight Trajectory Prediction With Vision-Positioning Denoising-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Haichao and Xu, Yi and Lu, Hongsheng and Shimizu, Takayuki and Fu, Yun}, title = {OOSTraj: Out-of-Sight Trajectory Prediction With Vision-Positioning Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14802-14811} }
FADES: Fair Disentanglement with Sensitive Relevance-
[pdf]
[supp]
[bibtex]@InProceedings{Jang_2024_CVPR, author = {Jang, Taeuk and Wang, Xiaoqian}, title = {FADES: Fair Disentanglement with Sensitive Relevance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12067-12076} }
Learning Continuous 3D Words for Text-to-Image Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Cheng_2024_CVPR, author = {Cheng, Ta-Ying and Gadelha, Matheus and Groueix, Thibault and Fisher, Matthew and Mech, Radomir and Markham, Andrew and Trigoni, Niki}, title = {Learning Continuous 3D Words for Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6753-6762} }
MarkovGen: Structured Prediction for Efficient Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jayasumana_2024_CVPR, author = {Jayasumana, Sadeep and Glasner, Daniel and Ramalingam, Srikumar and Veit, Andreas and Chakrabarti, Ayan and Kumar, Sanjiv}, title = {MarkovGen: Structured Prediction for Efficient Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9316-9325} }
Self-Supervised Class-Agnostic Motion Prediction with Spatial and Temporal Consistency Regularizations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Kewei and Wu, Yizheng and Cen, Jun and Pan, Zhiyu and Li, Xingyi and Wang, Zhe and Cao, Zhiguo and Lin, Guosheng}, title = {Self-Supervised Class-Agnostic Motion Prediction with Spatial and Temporal Consistency Regularizations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14638-14647} }
HashPoint: Accelerated Point Searching and Sampling for Neural Rendering-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2024_CVPR, author = {Ma, Jiahao and Liu, Miaomiao and Ahmedt-Aristizabal, David and Nguyen, Chuong}, title = {HashPoint: Accelerated Point Searching and Sampling for Neural Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4462-4472} }
MFP: Making Full Use of Probability Maps for Interactive Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2024_CVPR, author = {Lee, Chaewon and Lee, Seon-Ho and Kim, Chang-Su}, title = {MFP: Making Full Use of Probability Maps for Interactive Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4051-4059} }
CAT: Exploiting Inter-Class Dynamics for Domain Adaptive Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kennerley_2024_CVPR, author = {Kennerley, Mikhail and Wang, Jian-Gang and Veeravalli, Bharadwaj and Tan, Robby T.}, title = {CAT: Exploiting Inter-Class Dynamics for Domain Adaptive Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16541-16550} }
StyLitGAN: Image-Based Relighting via Latent Control-
[pdf]
[supp]
[bibtex]@InProceedings{Bhattad_2024_CVPR, author = {Bhattad, Anand and Soole, James and Forsyth, D.A.}, title = {StyLitGAN: Image-Based Relighting via Latent Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4231-4240} }
An Empirical Study of Scaling Law for Scene Text Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Rang_2024_CVPR, author = {Rang, Miao and Bi, Zhenni and Liu, Chuanjian and Wang, Yunhe and Han, Kai}, title = {An Empirical Study of Scaling Law for Scene Text Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15619-15629} }
Text2Loc: 3D Point Cloud Localization from Natural Language-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2024_CVPR, author = {Xia, Yan and Shi, Letian and Ding, Zifeng and Henriques, Joao F. and Cremers, Daniel}, title = {Text2Loc: 3D Point Cloud Localization from Natural Language}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14958-14967} }
SVDinsTN: A Tensor Network Paradigm for Efficient Structure Search from Regularized Modeling Perspective-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zheng_2024_CVPR, author = {Zheng, Yu-Bang and Zhao, Xi-Le and Zeng, Junhua and Li, Chao and Zhao, Qibin and Li, Heng-Chao and Huang, Ting-Zhu}, title = {SVDinsTN: A Tensor Network Paradigm for Efficient Structure Search from Regularized Modeling Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26254-26263} }
Decomposing Disease Descriptions for Enhanced Pathology Detection: A Multi-Aspect Vision-Language Pre-training Framework-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Phan_2024_CVPR, author = {Phan, Vu Minh Hieu and Xie, Yutong and Qi, Yuankai and Liu, Lingqiao and Liu, Liyang and Zhang, Bowen and Liao, Zhibin and Wu, Qi and To, Minh-Son and Verjans, Johan W.}, title = {Decomposing Disease Descriptions for Enhanced Pathology Detection: A Multi-Aspect Vision-Language Pre-training Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11492-11501} }
MoMask: Generative Masked Modeling of 3D Human Motions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2024_CVPR, author = {Guo, Chuan and Mu, Yuxuan and Javed, Muhammad Gohar and Wang, Sen and Cheng, Li}, title = {MoMask: Generative Masked Modeling of 3D Human Motions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1900-1910} }
Inverse Rendering of Glossy Objects via the Neural Plenoptic Function and Radiance Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Haoyuan and Hu, Wenbo and Zhu, Lei and Lau, Rynson W.H.}, title = {Inverse Rendering of Glossy Objects via the Neural Plenoptic Function and Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19999-20008} }
Split to Merge: Unifying Separated Modalities for Unsupervised Domain Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Xinyao and Li, Yuke and Du, Zhekai and Li, Fengling and Lu, Ke and Li, Jingjing}, title = {Split to Merge: Unifying Separated Modalities for Unsupervised Domain Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23364-23374} }
Fitting Flats to Flats-
[pdf]
[supp]
[bibtex]@InProceedings{Dogadov_2024_CVPR, author = {Dogadov, Gabriel and Finnendahl, Ugo and Alexa, Marc}, title = {Fitting Flats to Flats}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5439-5447} }
Fusing Personal and Environmental Cues for Identification and Segmentation of First-Person Camera Wearers in Third-Person Views-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Ziwei and Wang, Yuchen and Wang, Chuhua}, title = {Fusing Personal and Environmental Cues for Identification and Segmentation of First-Person Camera Wearers in Third-Person Views}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16477-16487} }
Coupled Laplacian Eigenmaps for Locally-Aware 3D Rigid Point Cloud Matching-
[pdf]
[supp]
[bibtex]@InProceedings{Bastico_2024_CVPR, author = {Bastico, Matteo and Decenci\`ere, Etienne and Cort\'e, Laurent and Tillier, Yannick and Ryckelynck, David}, title = {Coupled Laplacian Eigenmaps for Locally-Aware 3D Rigid Point Cloud Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3447-3458} }
Overcoming Generic Knowledge Loss with Selective Parameter Update-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Wenxuan and Janson, Paul and Aljundi, Rahaf and Elhoseiny, Mohamed}, title = {Overcoming Generic Knowledge Loss with Selective Parameter Update}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24046-24056} }
Desigen: A Pipeline for Controllable Design Template Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Weng_2024_CVPR, author = {Weng, Haohan and Huang, Danqing and Qiao, Yu and Hu, Zheng and Lin, Chin-Yew and Zhang, Tong and Chen, C. L. Philip}, title = {Desigen: A Pipeline for Controllable Design Template Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12721-12732} }
Diff-BGM: A Diffusion Model for Video Background Music Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Sizhe and Qin, Yiming and Zheng, Minghang and Jin, Xin and Liu, Yang}, title = {Diff-BGM: A Diffusion Model for Video Background Music Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27348-27357} }
Looking Similar Sounding Different: Leveraging Counterfactual Cross-Modal Pairs for Audiovisual Representation Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Singh_2024_CVPR, author = {Singh, Nikhil and Wu, Chih-Wei and Orife, Iroro and Kalayeh, Mahdi}, title = {Looking Similar Sounding Different: Leveraging Counterfactual Cross-Modal Pairs for Audiovisual Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26907-26918} }
Multi-criteria Token Fusion with One-step-ahead Attention for Efficient Vision Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2024_CVPR, author = {Lee, Sanghyeok and Choi, Joonmyung and Kim, Hyunwoo J.}, title = {Multi-criteria Token Fusion with One-step-ahead Attention for Efficient Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15741-15750} }
Towards HDR and HFR Video from Rolling-Mixed-Bit Spikings-
[pdf]
[supp]
[bibtex]@InProceedings{Chang_2024_CVPR, author = {Chang, Yakun and Xiaokaiti, Yeliduosi and Liu, Yujia and Fan, Bin and Huang, Zhaojun and Huang, Tiejun and Shi, Boxin}, title = {Towards HDR and HFR Video from Rolling-Mixed-Bit Spikings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25117-25127} }
Scaling Up Video Summarization Pretraining with Large Language Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Argaw_2024_CVPR, author = {Argaw, Dawit Mureja and Yoon, Seunghyun and Heilbron, Fabian Caba and Deilamsalehy, Hanieh and Bui, Trung and Wang, Zhaowen and Dernoncourt, Franck and Chung, Joon Son}, title = {Scaling Up Video Summarization Pretraining with Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8332-8341} }
Continuous Optical Zooming: A Benchmark for Arbitrary-Scale Image Super-Resolution in Real World-
[pdf]
[bibtex]@InProceedings{Fu_2024_CVPR, author = {Fu, Huiyuan and Peng, Fei and Li, Xianwei and Li, Yejun and Wang, Xin and Ma, Huadong}, title = {Continuous Optical Zooming: A Benchmark for Arbitrary-Scale Image Super-Resolution in Real World}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3035-3044} }
Sharingan: A Transformer Architecture for Multi-Person Gaze Following-
[pdf]
[supp]
[bibtex]@InProceedings{Tafasca_2024_CVPR, author = {Tafasca, Samy and Gupta, Anshul and Odobez, Jean-Marc}, title = {Sharingan: A Transformer Architecture for Multi-Person Gaze Following}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2008-2017} }
ViewFusion: Towards Multi-View Consistency via Interpolated Denoising-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Xianghui and Zuo, Yan and Ramasinghe, Sameera and Bazzani, Loris and Avraham, Gil and van den Hengel, Anton}, title = {ViewFusion: Towards Multi-View Consistency via Interpolated Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9870-9880} }
SketchINR: A First Look into Sketches as Implicit Neural Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bandyopadhyay_2024_CVPR, author = {Bandyopadhyay, Hmrishav and Bhunia, Ayan Kumar and Chowdhury, Pinaki Nath and Sain, Aneeshan and Xiang, Tao and Hospedales, Timothy and Song, Yi-Zhe}, title = {SketchINR: A First Look into Sketches as Implicit Neural Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12565-12574} }
Open-Vocabulary Segmentation with Semantic-Assisted Calibration-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Yong and Bai, Sule and Li, Guanbin and Wang, Yitong and Tang, Yansong}, title = {Open-Vocabulary Segmentation with Semantic-Assisted Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3491-3500} }
MatchU: Matching Unseen Objects for 6D Pose Estimation from RGB-D Images-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Junwen and Yu, Hao and Yu, Kuan-Ting and Navab, Nassir and Ilic, Slobodan and Busam, Benjamin}, title = {MatchU: Matching Unseen Objects for 6D Pose Estimation from RGB-D Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10095-10105} }
Towards a Perceptual Evaluation Framework for Lighting Estimation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Giroux_2024_CVPR, author = {Giroux, Justine and Dastjerdi, Mohammad Reza Karimi and Hold-Geoffroy, Yannick and Vazquez-Corral, Javier and Lalonde, Jean-Fran\c{c}ois}, title = {Towards a Perceptual Evaluation Framework for Lighting Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4410-4419} }
Bridging the Synthetic-to-Authentic Gap: Distortion-Guided Unsupervised Domain Adaptation for Blind Image Quality Assessment-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Aobo and Wu, Jinjian and Liu, Yongxu and Li, Leida}, title = {Bridging the Synthetic-to-Authentic Gap: Distortion-Guided Unsupervised Domain Adaptation for Blind Image Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28422-28431} }
Coherent Temporal Synthesis for Incremental Action Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ding_2024_CVPR, author = {Ding, Guodong and Golong, Hans and Yao, Angela}, title = {Coherent Temporal Synthesis for Incremental Action Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28485-28494} }
HiFi4G: High-Fidelity Human Performance Rendering via Compact Gaussian Splatting-
[pdf]
[arXiv]
[bibtex]@InProceedings{Jiang_2024_CVPR, author = {Jiang, Yuheng and Shen, Zhehao and Wang, Penghao and Su, Zhuo and Hong, Yu and Zhang, Yingliang and Yu, Jingyi and Xu, Lan}, title = {HiFi4G: High-Fidelity Human Performance Rendering via Compact Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19734-19745} }
G-FARS: Gradient-Field-based Auto-Regressive Sampling for 3D Part Grouping-
[pdf]
[supp]
[bibtex]@InProceedings{Cheng_2024_CVPR, author = {Cheng, Junfeng and Stathaki, Tania}, title = {G-FARS: Gradient-Field-based Auto-Regressive Sampling for 3D Part Grouping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27652-27661} }
Towards High-fidelity Artistic Image Vectorization via Texture-Encapsulated Shape Parameterization-
[pdf]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Ye and Ni, Bingbing and Liu, Jinfan and Huang, Xiaoyang and Chen, Xuanhong}, title = {Towards High-fidelity Artistic Image Vectorization via Texture-Encapsulated Shape Parameterization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15877-15886} }
On Exact Inversion of DPM-Solvers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hong_2024_CVPR, author = {Hong, Seongmin and Lee, Kyeonghyun and Jeon, Suh Yoon and Bae, Hyewon and Chun, Se Young}, title = {On Exact Inversion of DPM-Solvers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7069-7078} }
EfficientSAM: Leveraged Masked Image Pretraining for Efficient Segment Anything-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiong_2024_CVPR, author = {Xiong, Yunyang and Varadarajan, Bala and Wu, Lemeng and Xiang, Xiaoyu and Xiao, Fanyi and Zhu, Chenchen and Dai, Xiaoliang and Wang, Dilin and Sun, Fei and Iandola, Forrest and Krishnamoorthi, Raghuraman and Chandra, Vikas}, title = {EfficientSAM: Leveraged Masked Image Pretraining for Efficient Segment Anything}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16111-16121} }
ChatScene: Knowledge-Enabled Safety-Critical Scenario Generation for Autonomous Vehicles-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Jiawei and Xu, Chejian and Li, Bo}, title = {ChatScene: Knowledge-Enabled Safety-Critical Scenario Generation for Autonomous Vehicles}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15459-15469} }
CAMEL: CAusal Motion Enhancement Tailored for Lifting Text-driven Video Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Guiwei and Zhang, Tianyu and Niu, Guanglin and Tan, Zichang and Bai, Yalong and Yang, Qing}, title = {CAMEL: CAusal Motion Enhancement Tailored for Lifting Text-driven Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9079-9088} }
Teeth-SEG: An Efficient Instance Segmentation Framework for Orthodontic Treatment based on Multi-Scale Aggregation and Anthropic Prior Knowledge-
[pdf]
[supp]
[bibtex]@InProceedings{Zou_2024_CVPR, author = {Zou, Bo and Wang, Shaofeng and Liu, Hao and Sun, Gaoyue and Wang, Yajie and Zuo, FeiFei and Quan, Chengbin and Zhao, Youjian}, title = {Teeth-SEG: An Efficient Instance Segmentation Framework for Orthodontic Treatment based on Multi-Scale Aggregation and Anthropic Prior Knowledge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11601-11610} }
FocSAM: Delving Deeply into Focused Objects in Segmenting Anything-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, You and Lan, Zongyu and Cao, Liujuan and Lin, Xianming and Zhang, Shengchuan and Jiang, Guannan and Ji, Rongrong}, title = {FocSAM: Delving Deeply into Focused Objects in Segmenting Anything}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3120-3130} }
DMR: Decomposed Multi-Modality Representations for Frames and Events Fusion in Visual Reinforcement Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Haoran and Peng, Peixi and Tan, Guang and Li, Yuan and Xu, Xinhai and Tian, Yonghong}, title = {DMR: Decomposed Multi-Modality Representations for Frames and Events Fusion in Visual Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26508-26518} }
DiffuseMix: Label-Preserving Data Augmentation with Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Islam_2024_CVPR, author = {Islam, Khawar and Zaheer, Muhammad Zaigham and Mahmood, Arif and Nandakumar, Karthik}, title = {DiffuseMix: Label-Preserving Data Augmentation with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27621-27630} }
PRDP: Proximal Reward Difference Prediction for Large-Scale Reward Finetuning of Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deng_2024_CVPR, author = {Deng, Fei and Wang, Qifei and Wei, Wei and Hou, Tingbo and Grundmann, Matthias}, title = {PRDP: Proximal Reward Difference Prediction for Large-Scale Reward Finetuning of Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7423-7433} }
FREE: Faster and Better Data-Free Meta-Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2024_CVPR, author = {Wei, Yongxian and Hu, Zixuan and Wang, Zhenyi and Shen, Li and Yuan, Chun and Tao, Dacheng}, title = {FREE: Faster and Better Data-Free Meta-Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23273-23282} }
Bayesian Diffusion Models for 3D Shape Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Haiyang and Lei, Yu and Chen, Zeyuan and Zhang, Xiang and Zhao, Yue and Wang, Yilin and Tu, Zhuowen}, title = {Bayesian Diffusion Models for 3D Shape Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10628-10638} }
Task-Customized Mixture of Adapters for General Image Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Pengfei and Sun, Yang and Cao, Bing and Hu, Qinghua}, title = {Task-Customized Mixture of Adapters for General Image Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7099-7108} }
Bi-SSC: Geometric-Semantic Bidirectional Fusion for Camera-based 3D Semantic Scene Completion-
[pdf]
[supp]
[bibtex]@InProceedings{Xue_2024_CVPR, author = {Xue, Yujie and Li, Ruihui and Wu, Fan and Tang, Zhuo and Li, Kenli and Duan, Mingxing}, title = {Bi-SSC: Geometric-Semantic Bidirectional Fusion for Camera-based 3D Semantic Scene Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20124-20134} }
CrossKD: Cross-Head Knowledge Distillation for Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Jiabao and Chen, Yuming and Zheng, Zhaohui and Li, Xiang and Cheng, Ming-Ming and Hou, Qibin}, title = {CrossKD: Cross-Head Knowledge Distillation for Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16520-16530} }
Bi-level Learning of Task-Specific Decoders for Joint Registration and One-Shot Medical Image Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Fan_2024_CVPR, author = {Fan, Xin and Wang, Xiaolin and Gao, Jiaxin and Wang, Jia and Luo, Zhongxuan and Liu, Risheng}, title = {Bi-level Learning of Task-Specific Decoders for Joint Registration and One-Shot Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11726-11735} }
Parameter Efficient Self-Supervised Geospatial Domain Adaptation-
[pdf]
[supp]
[bibtex]@InProceedings{Scheibenreif_2024_CVPR, author = {Scheibenreif, Linus and Mommert, Michael and Borth, Damian}, title = {Parameter Efficient Self-Supervised Geospatial Domain Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27841-27851} }
Defense without Forgetting: Continual Adversarial Defense with Anisotropic & Isotropic Pseudo Replay-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Yuhang and Hua, Zhongyun}, title = {Defense without Forgetting: Continual Adversarial Defense with Anisotropic \& Isotropic Pseudo Replay}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24263-24272} }
EscherNet: A Generative Model for Scalable View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kong_2024_CVPR, author = {Kong, Xin and Liu, Shikun and Lyu, Xiaoyang and Taher, Marwan and Qi, Xiaojuan and Davison, Andrew J.}, title = {EscherNet: A Generative Model for Scalable View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9503-9513} }
MeaCap: Memory-Augmented Zero-shot Image Captioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2024_CVPR, author = {Zeng, Zequn and Xie, Yan and Zhang, Hao and Chen, Chiyu and Chen, Bo and Wang, Zhengjue}, title = {MeaCap: Memory-Augmented Zero-shot Image Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14100-14110} }
Artist-Friendly Relightable and Animatable Neural Heads-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Yingyan and Chandran, Prashanth and Weiss, Sebastian and Gross, Markus and Zoss, Gaspard and Bradley, Derek}, title = {Artist-Friendly Relightable and Animatable Neural Heads}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2457-2467} }
Elite360D: Towards Efficient 360 Depth Estimation via Semantic- and Distance-Aware Bi-Projection Fusion-
[pdf]
[supp]
[bibtex]@InProceedings{Ai_2024_CVPR, author = {Ai, Hao and Wang, Lin}, title = {Elite360D: Towards Efficient 360 Depth Estimation via Semantic- and Distance-Aware Bi-Projection Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9926-9935} }
From Feature to Gaze: A Generalizable Replacement of Linear Layer for Gaze Estimation-
[pdf]
[bibtex]@InProceedings{Bao_2024_CVPR, author = {Bao, Yiwei and Lu, Feng}, title = {From Feature to Gaze: A Generalizable Replacement of Linear Layer for Gaze Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1409-1418} }
Curriculum Point Prompting for Weakly-Supervised Referring Image Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Dai_2024_CVPR, author = {Dai, Qiyuan and Yang, Sibei}, title = {Curriculum Point Prompting for Weakly-Supervised Referring Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13711-13722} }
EventDance: Unsupervised Source-free Cross-modal Adaptation for Event-based Object Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2024_CVPR, author = {Zheng, Xu and Wang, Lin}, title = {EventDance: Unsupervised Source-free Cross-modal Adaptation for Event-based Object Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17448-17458} }
CycleINR: Cycle Implicit Neural Representation for Arbitrary-Scale Volumetric Super-Resolution of Medical Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2024_CVPR, author = {Fang, Wei and Tang, Yuxing and Guo, Heng and Yuan, Mingze and Mok, Tony C. W. and Yan, Ke and Yao, Jiawen and Chen, Xin and Liu, Zaiyi and Lu, Le and Zhang, Ling and Xu, Minfeng}, title = {CycleINR: Cycle Implicit Neural Representation for Arbitrary-Scale Volumetric Super-Resolution of Medical Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11631-11641} }
Boosting Image Restoration via Priors from Pre-trained Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Xiaogang and Kong, Shu and Hu, Tao and Liu, Zhe and Bao, Hujun}, title = {Boosting Image Restoration via Priors from Pre-trained Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2900-2909} }
VRetouchEr: Learning Cross-frame Feature Interdependence with Imperfection Flow for Face Retouching in Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Xue_2024_CVPR, author = {Xue, Wen and Jiang, Le and Xie, Lianxin and Wu, Si and Xu, Yong and Wong, Hau San}, title = {VRetouchEr: Learning Cross-frame Feature Interdependence with Imperfection Flow for Face Retouching in Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9141-9150} }
Transferable Structural Sparse Adversarial Attack Via Exact Group Sparsity Training-
[pdf]
[supp]
[bibtex]@InProceedings{Ming_2024_CVPR, author = {Ming, Di and Ren, Peng and Wang, Yunlong and Feng, Xin}, title = {Transferable Structural Sparse Adversarial Attack Via Exact Group Sparsity Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24696-24705} }
Holistic Autonomous Driving Understanding by Bird's-Eye-View Injected Multi-Modal Large Models-
[pdf]
[supp]
[bibtex]@InProceedings{Ding_2024_CVPR, author = {Ding, Xinpeng and Han, Jianhua and Xu, Hang and Liang, Xiaodan and Zhang, Wei and Li, Xiaomeng}, title = {Holistic Autonomous Driving Understanding by Bird's-Eye-View Injected Multi-Modal Large Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13668-13677} }
Arbitrary-Scale Image Generation and Upsampling using Latent Diffusion Model and Implicit Neural Decoder-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Jinseok and Kim, Tae-Kyun}, title = {Arbitrary-Scale Image Generation and Upsampling using Latent Diffusion Model and Implicit Neural Decoder}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9202-9211} }
Unsupervised Occupancy Learning from Sparse Point Cloud-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ouasfi_2024_CVPR, author = {Ouasfi, Amine and Boukhayma, Adnane}, title = {Unsupervised Occupancy Learning from Sparse Point Cloud}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21729-21739} }
Extreme Point Supervised Instance Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2024_CVPR, author = {Lee, Hyeonjun and Hwang, Sehyun and Kwak, Suha}, title = {Extreme Point Supervised Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17212-17222} }
3DInAction: Understanding Human Actions in 3D Point Clouds-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ben-Shabat_2024_CVPR, author = {Ben-Shabat, Yizhak and Shrout, Oren and Gould, Stephen}, title = {3DInAction: Understanding Human Actions in 3D Point Clouds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19978-19987} }
Cache Me if You Can: Accelerating Diffusion Models through Block Caching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wimbauer_2024_CVPR, author = {Wimbauer, Felix and Wu, Bichen and Schoenfeld, Edgar and Dai, Xiaoliang and Hou, Ji and He, Zijian and Sanakoyeu, Artsiom and Zhang, Peizhao and Tsai, Sam and Kohler, Jonas and Rupprecht, Christian and Cremers, Daniel and Vajda, Peter and Wang, Jialiang}, title = {Cache Me if You Can: Accelerating Diffusion Models through Block Caching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6211-6220} }
MedM2G: Unifying Medical Multi-Modal Generation via Cross-Guided Diffusion with Visual Invariant-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhan_2024_CVPR, author = {Zhan, Chenlu and Lin, Yu and Wang, Gaoang and Wang, Hongwei and Wu, Jian}, title = {MedM2G: Unifying Medical Multi-Modal Generation via Cross-Guided Diffusion with Visual Invariant}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11502-11512} }
SDDGR: Stable Diffusion-based Deep Generative Replay for Class Incremental Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Junsu and Cho, Hoseong and Kim, Jihyeon and Tiruneh, Yihalem Yimolal and Baek, Seungryul}, title = {SDDGR: Stable Diffusion-based Deep Generative Replay for Class Incremental Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28772-28781} }
Neural Parametric Gaussians for Monocular Non-Rigid Object Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Das_2024_CVPR, author = {Das, Devikalyan and Wewer, Christopher and Yunus, Raza and Ilg, Eddy and Lenssen, Jan Eric}, title = {Neural Parametric Gaussians for Monocular Non-Rigid Object Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10715-10725} }
Physical 3D Adversarial Attacks against Monocular Depth Estimation in Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2024_CVPR, author = {Zheng, Junhao and Lin, Chenhao and Sun, Jiahao and Zhao, Zhengyu and Li, Qian and Shen, Chao}, title = {Physical 3D Adversarial Attacks against Monocular Depth Estimation in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24452-24461} }
Adaptive Random Feature Regularization on Fine-tuning Deep Neural Networks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yamaguchi_2024_CVPR, author = {Yamaguchi, Shin'ya and Kanai, Sekitoshi and Adachi, Kazuki and Chijiwa, Daiki}, title = {Adaptive Random Feature Regularization on Fine-tuning Deep Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23481-23490} }
PH-Net: Semi-Supervised Breast Lesion Segmentation via Patch-wise Hardness-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2024_CVPR, author = {Jiang, Siyao and Wu, Huisi and Chen, Junyang and Zhang, Qin and Qin, Jing}, title = {PH-Net: Semi-Supervised Breast Lesion Segmentation via Patch-wise Hardness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11418-11427} }
Multimodal Prompt Perceiver: Empower Adaptiveness Generalizability and Fidelity for All-in-One Image Restoration-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ai_2024_CVPR, author = {Ai, Yuang and Huang, Huaibo and Zhou, Xiaoqiang and Wang, Jiexiang and He, Ran}, title = {Multimodal Prompt Perceiver: Empower Adaptiveness Generalizability and Fidelity for All-in-One Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25432-25444} }
ExACT: Language-guided Conceptual Reasoning and Uncertainty Estimation for Event-based Action Recognition and More-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Jiazhou and Zheng, Xu and Lyu, Yuanhuiyi and Wang, Lin}, title = {ExACT: Language-guided Conceptual Reasoning and Uncertainty Estimation for Event-based Action Recognition and More}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18633-18643} }
Color Shift Estimation-and-Correction for Image Enhancement-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Yiyu and Xu, Ke and Hancke, Gerhard Petrus and Lau, Rynson W.H.}, title = {Color Shift Estimation-and-Correction for Image Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25389-25398} }
Improving Visual Recognition with Hyperbolical Visual Hierarchy Mapping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kwon_2024_CVPR, author = {Kwon, Hyeongjun and Jang, Jinhyun and Kim, Jin and Kim, Kwonyoung and Sohn, Kwanghoon}, title = {Improving Visual Recognition with Hyperbolical Visual Hierarchy Mapping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17364-17374} }
ParameterNet: Parameters Are All You Need for Large-scale Visual Pretraining of Mobile Networks-
[pdf]
[bibtex]@InProceedings{Han_2024_CVPR, author = {Han, Kai and Wang, Yunhe and Guo, Jianyuan and Wu, Enhua}, title = {ParameterNet: Parameters Are All You Need for Large-scale Visual Pretraining of Mobile Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15751-15761} }
Repurposing Diffusion-Based Image Generators for Monocular Depth Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ke_2024_CVPR, author = {Ke, Bingxin and Obukhov, Anton and Huang, Shengyu and Metzger, Nando and Daudt, Rodrigo Caye and Schindler, Konrad}, title = {Repurposing Diffusion-Based Image Generators for Monocular Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9492-9502} }
Identifying Important Group of Pixels using Interactions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sumiyasu_2024_CVPR, author = {Sumiyasu, Kosuke and Kawamoto, Kazuhiko and Kera, Hiroshi}, title = {Identifying Important Group of Pixels using Interactions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6017-6026} }
Towards Scalable 3D Anomaly Detection and Localization: A Benchmark via 3D Anomaly Synthesis and A Self-Supervised Learning Network-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Wenqiao and Xu, Xiaohao and Gu, Yao and Zheng, Bozhong and Gao, Shenghua and Wu, Yingna}, title = {Towards Scalable 3D Anomaly Detection and Localization: A Benchmark via 3D Anomaly Synthesis and A Self-Supervised Learning Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22207-22216} }
Cam4DOcc: Benchmark for Camera-Only 4D Occupancy Forecasting in Autonomous Driving Applications-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2024_CVPR, author = {Ma, Junyi and Chen, Xieyuanli and Huang, Jiawei and Xu, Jingyi and Luo, Zhen and Xu, Jintao and Gu, Weihao and Ai, Rui and Wang, Hesheng}, title = {Cam4DOcc: Benchmark for Camera-Only 4D Occupancy Forecasting in Autonomous Driving Applications}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21486-21495} }
DIOD: Self-Distillation Meets Object Discovery-
[pdf]
[supp]
[bibtex]@InProceedings{Kara_2024_CVPR, author = {Kara, Sandra and Ammar, Hejer and Denize, Julien and Chabot, Florian and Pham, Quoc-Cuong}, title = {DIOD: Self-Distillation Meets Object Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3975-3985} }
GoMAvatar: Efficient Animatable Human Modeling from Monocular Video Using Gaussians-on-Mesh-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wen_2024_CVPR, author = {Wen, Jing and Zhao, Xiaoming and Ren, Zhongzheng and Schwing, Alexander G. and Wang, Shenlong}, title = {GoMAvatar: Efficient Animatable Human Modeling from Monocular Video Using Gaussians-on-Mesh}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2059-2069} }
Neural Redshift: Random Networks are not Random Functions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Teney_2024_CVPR, author = {Teney, Damien and Nicolicioiu, Armand Mihai and Hartmann, Valentin and Abbasnejad, Ehsan}, title = {Neural Redshift: Random Networks are not Random Functions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4786-4796} }
HumanGaussian: Text-Driven 3D Human Generation with Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Xian and Zhan, Xiaohang and Tang, Jiaxiang and Shan, Ying and Zeng, Gang and Lin, Dahua and Liu, Xihui and Liu, Ziwei}, title = {HumanGaussian: Text-Driven 3D Human Generation with Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6646-6657} }
DIEM: Decomposition-Integration Enhancing Multimodal Insights-
[pdf]
[bibtex]@InProceedings{Jiang_2024_CVPR, author = {Jiang, Xinyi and Wang, Guoming and Guo, Junhao and Li, Juncheng and Zhang, Wenqiao and Lu, Rongxing and Tang, Siliang}, title = {DIEM: Decomposition-Integration Enhancing Multimodal Insights}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27304-27313} }
CosmicMan: A Text-to-Image Foundation Model for Humans-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Shikai and Fu, Jianglin and Liu, Kaiyuan and Wang, Wentao and Lin, Kwan-Yee and Wu, Wayne}, title = {CosmicMan: A Text-to-Image Foundation Model for Humans}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6955-6965} }
LLMs are Good Sign Language Translators-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gong_2024_CVPR, author = {Gong, Jia and Foo, Lin Geng and He, Yixuan and Rahmani, Hossein and Liu, Jun}, title = {LLMs are Good Sign Language Translators}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18362-18372} }
Contrastive Pre-Training with Multi-View Fusion for No-Reference Point Cloud Quality Assessment-
[pdf]
[arXiv]
[bibtex]@InProceedings{Shan_2024_CVPR, author = {Shan, Ziyu and Zhang, Yujie and Yang, Qi and Yang, Haichen and Xu, Yiling and Hwang, Jenq-Neng and Xu, Xiaozhong and Liu, Shan}, title = {Contrastive Pre-Training with Multi-View Fusion for No-Reference Point Cloud Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25942-25951} }
JDEC: JPEG Decoding via Enhanced Continuous Cosine Coefficients-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2024_CVPR, author = {Han, Woo Kyoung and Im, Sunghoon and Kim, Jaedeok and Jin, Kyong Hwan}, title = {JDEC: JPEG Decoding via Enhanced Continuous Cosine Coefficients}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2784-2793} }
Revisiting the Domain Shift and Sample Uncertainty in Multi-source Active Domain Transfer-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Wenqiao and Lv, Zheqi and Zhou, Hao and Liu, Jia-Wei and Li, Juncheng and Li, Mengze and Li, Yunfei and Zhang, Dongping and Zhuang, Yueting and Tang, Siliang}, title = {Revisiting the Domain Shift and Sample Uncertainty in Multi-source Active Domain Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16751-16761} }
Learning Continual Compatible Representation for Re-indexing Free Lifelong Person Re-identification-
[pdf]
[supp]
[bibtex]@InProceedings{Cui_2024_CVPR, author = {Cui, Zhenyu and Zhou, Jiahuan and Wang, Xun and Zhu, Manyu and Peng, Yuxin}, title = {Learning Continual Compatible Representation for Re-indexing Free Lifelong Person Re-identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16614-16623} }
Revisiting Spatial-Frequency Information Integration from a Hierarchical Perspective for Panchromatic and Multi-Spectral Image Fusion-
[pdf]
[supp]
[bibtex]@InProceedings{Tan_2024_CVPR, author = {Tan, Jiangtong and Huang, Jie and Zheng, Naishan and Zhou, Man and Yan, Keyu and Hong, Danfeng and Zhao, Feng}, title = {Revisiting Spatial-Frequency Information Integration from a Hierarchical Perspective for Panchromatic and Multi-Spectral Image Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25922-25931} }
BSNet: Box-Supervised Simulation-assisted Mean Teacher for 3D Instance Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2024_CVPR, author = {Lu, Jiahao and Deng, Jiacheng and Zhang, Tianzhu}, title = {BSNet: Box-Supervised Simulation-assisted Mean Teacher for 3D Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20374-20384} }
Adaptive Slot Attention: Object Discovery with Dynamic Slot Number-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2024_CVPR, author = {Fan, Ke and Bai, Zechen and Xiao, Tianjun and He, Tong and Horn, Max and Fu, Yanwei and Locatello, Francesco and Zhang, Zheng}, title = {Adaptive Slot Attention: Object Discovery with Dynamic Slot Number}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23062-23071} }
CORES: Convolutional Response-based Score for Out-of-distribution Detection-
[pdf]
[bibtex]@InProceedings{Tang_2024_CVPR, author = {Tang, Keke and Hou, Chao and Peng, Weilong and Chen, Runnan and Zhu, Peican and Wang, Wenping and Tian, Zhihong}, title = {CORES: Convolutional Response-based Score for Out-of-distribution Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10916-10925} }
Task-Driven Wavelets using Constrained Empirical Risk Minimization-
[pdf]
[supp]
[bibtex]@InProceedings{Marcus_2024_CVPR, author = {Marcus, Eric and Sheombarsing, Ray and Sonke, Jan-Jakob and Teuwen, Jonas}, title = {Task-Driven Wavelets using Constrained Empirical Risk Minimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24098-24107} }
HOI-M^3: Capture Multiple Humans and Objects Interaction within Contextual Environment-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Juze and Zhang, Jingyan and Song, Zining and Shi, Zhanhe and Zhao, Chengfeng and Shi, Ye and Yu, Jingyi and Xu, Lan and Wang, Jingya}, title = {HOI-M{\textasciicircum}3: Capture Multiple Humans and Objects Interaction within Contextual Environment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {516-526} }
Interactive3D: Create What You Want by Interactive 3D Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dong_2024_CVPR, author = {Dong, Shaocong and Ding, Lihe and Huang, Zhanpeng and Wang, Zibin and Xue, Tianfan and Xu, Dan}, title = {Interactive3D: Create What You Want by Interactive 3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4999-5008} }
DeiT-LT: Distillation Strikes Back for Vision Transformer Training on Long-Tailed Datasets-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rangwani_2024_CVPR, author = {Rangwani, Harsh and Mondal, Pradipto and Mishra, Mayank and Asokan, Ashish Ramayee and Babu, R. Venkatesh}, title = {DeiT-LT: Distillation Strikes Back for Vision Transformer Training on Long-Tailed Datasets}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23396-23406} }
Accurate Spatial Gene Expression Prediction by Integrating Multi-Resolution Features-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chung_2024_CVPR, author = {Chung, Youngmin and Ha, Ji Hun and Im, Kyeong Chan and Lee, Joo Sang}, title = {Accurate Spatial Gene Expression Prediction by Integrating Multi-Resolution Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11591-11600} }
FCS: Feature Calibration and Separation for Non-Exemplar Class Incremental Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Qiwei and Peng, Yuxin and Zhou, Jiahuan}, title = {FCS: Feature Calibration and Separation for Non-Exemplar Class Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28495-28504} }
Task2Box: Box Embeddings for Modeling Asymmetric Task Relationships-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Daroya_2024_CVPR, author = {Daroya, Rangel and Sun, Aaron and Maji, Subhransu}, title = {Task2Box: Box Embeddings for Modeling Asymmetric Task Relationships}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28827-28837} }
Behind the Veil: Enhanced Indoor 3D Scene Reconstruction with Occluded Surfaces Completion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2024_CVPR, author = {Sun, Su and Zhao, Cheng and Guo, Yuliang and Wang, Ruoyu and Huang, Xinyu and Chen, Yingjie Victor and Ren, Liu}, title = {Behind the Veil: Enhanced Indoor 3D Scene Reconstruction with Occluded Surfaces Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12744-12753} }
VideoGrounding-DINO: Towards Open-Vocabulary Spatio-Temporal Video Grounding-
[pdf]
[bibtex]@InProceedings{Wasim_2024_CVPR, author = {Wasim, Syed Talal and Naseer, Muzammal and Khan, Salman and Yang, Ming-Hsuan and Khan, Fahad Shahbaz}, title = {VideoGrounding-DINO: Towards Open-Vocabulary Spatio-Temporal Video Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18909-18918} }
OmniLocalRF: Omnidirectional Local Radiance Fields from Dynamic Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2024_CVPR, author = {Choi, Dongyoung and Jang, Hyeonjoong and Kim, Min H.}, title = {OmniLocalRF: Omnidirectional Local Radiance Fields from Dynamic Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6871-6880} }
LoS: Local Structure-Guided Stereo Matching-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Kunhong and Wang, Longguang and Zhang, Ye and Xue, Kaiwen and Zhou, Shunbo and Guo, Yulan}, title = {LoS: Local Structure-Guided Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19746-19756} }
Semantic Human Mesh Reconstruction with Textures-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhan_2024_CVPR, author = {Zhan, Xiaoyu and Yang, Jianxin and Li, Yuanqi and Guo, Jie and Guo, Yanwen and Wang, Wenping}, title = {Semantic Human Mesh Reconstruction with Textures}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {142-152} }
Think Twice Before Selection: Federated Evidential Active Learning for Medical Image Analysis with Domain Shifts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Jiayi and Ma, Benteng and Cui, Hengfei and Xia, Yong}, title = {Think Twice Before Selection: Federated Evidential Active Learning for Medical Image Analysis with Domain Shifts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11439-11449} }
Probing the 3D Awareness of Visual Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{El_Banani_2024_CVPR, author = {El Banani, Mohamed and Raj, Amit and Maninis, Kevis-Kokitsi and Kar, Abhishek and Li, Yuanzhen and Rubinstein, Michael and Sun, Deqing and Guibas, Leonidas and Johnson, Justin and Jampani, Varun}, title = {Probing the 3D Awareness of Visual Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21795-21806} }
PIA: Your Personalized Image Animator via Plug-and-Play Modules in Text-to-Image Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yiming and Xing, Zhening and Zeng, Yanhong and Fang, Youqing and Chen, Kai}, title = {PIA: Your Personalized Image Animator via Plug-and-Play Modules in Text-to-Image Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7747-7756} }
When Visual Grounding Meets Gigapixel-level Large-scale Scenes: Benchmark and Approach-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2024_CVPR, author = {Ma, Tao and Bai, Bing and Lin, Haozhe and Wang, Heyuan and Wang, Yu and Luo, Lin and Fang, Lu}, title = {When Visual Grounding Meets Gigapixel-level Large-scale Scenes: Benchmark and Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22119-22128} }
NeRF Analogies: Example-Based Visual Attribute Transfer for NeRFs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fischer_2024_CVPR, author = {Fischer, Michael and Li, Zhengqin and Nguyen-Phuoc, Thu and Bozic, Aljaz and Dong, Zhao and Marshall, Carl and Ritschel, Tobias}, title = {NeRF Analogies: Example-Based Visual Attribute Transfer for NeRFs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4640-4650} }
Mind Artist: Creating Artistic Snapshots with Human Thought-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Jiaxuan and Qi, Yu and Wang, Yueming and Pan, Gang}, title = {Mind Artist: Creating Artistic Snapshots with Human Thought}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27207-27217} }
ViTamin: Designing Scalable Vision Models in the Vision-Language Era-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Jieneng and Yu, Qihang and Shen, Xiaohui and Yuille, Alan and Chen, Liang-Chieh}, title = {ViTamin: Designing Scalable Vision Models in the Vision-Language Era}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12954-12966} }
Accept the Modality Gap: An Exploration in the Hyperbolic Space-
[pdf]
[supp]
[bibtex]@InProceedings{Ramasinghe_2024_CVPR, author = {Ramasinghe, Sameera and Shevchenko, Violetta and Avraham, Gil and Thalaiyasingam, Ajanthan}, title = {Accept the Modality Gap: An Exploration in the Hyperbolic Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27263-27272} }
Unraveling Instance Associations: A Closer Look for Audio-Visual Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Yuanhong and Liu, Yuyuan and Wang, Hu and Liu, Fengbei and Wang, Chong and Frazer, Helen and Carneiro, Gustavo}, title = {Unraveling Instance Associations: A Closer Look for Audio-Visual Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26497-26507} }
Few-Shot Object Detection with Foundation Models-
[pdf]
[supp]
[bibtex]@InProceedings{Han_2024_CVPR, author = {Han, Guangxing and Lim, Ser-Nam}, title = {Few-Shot Object Detection with Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28608-28618} }
FedMef: Towards Memory-efficient Federated Dynamic Pruning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Hong and Zhuang, Weiming and Chen, Chen and Lyu, Lingjuan}, title = {FedMef: Towards Memory-efficient Federated Dynamic Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27548-27557} }
Seeing the Unseen: Visual Common Sense for Semantic Placement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ramrakhya_2024_CVPR, author = {Ramrakhya, Ram and Kembhavi, Aniruddha and Batra, Dhruv and Kira, Zsolt and Zeng, Kuo-Hao and Weihs, Luca}, title = {Seeing the Unseen: Visual Common Sense for Semantic Placement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16273-16283} }
Texture-Preserving Diffusion Models for High-Fidelity Virtual Try-On-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Xu and Ding, Changxing and Hong, Zhibin and Huang, Junhao and Tao, Jin and Xu, Xiangmin}, title = {Texture-Preserving Diffusion Models for High-Fidelity Virtual Try-On}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7017-7026} }
PracticalDG: Perturbation Distillation on Vision-Language Models for Hybrid Domain Generalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Zining and Wang, Weiqiu and Zhao, Zhicheng and Su, Fei and Men, Aidong and Meng, Hongying}, title = {PracticalDG: Perturbation Distillation on Vision-Language Models for Hybrid Domain Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23501-23511} }
SODA: Bottleneck Diffusion Models for Representation Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hudson_2024_CVPR, author = {Hudson, Drew A. and Zoran, Daniel and Malinowski, Mateusz and Lampinen, Andrew K. and Jaegle, Andrew and McClelland, James L. and Matthey, Loic and Hill, Felix and Lerchner, Alexander}, title = {SODA: Bottleneck Diffusion Models for Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23115-23127} }
Towards Robust Event-guided Low-Light Image Enhancement: A Large-Scale Real-World Event-Image Dataset and Novel Approach-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2024_CVPR, author = {Liang, Guoqiang and Chen, Kanghao and Li, Hangyu and Lu, Yunfan and Wang, Lin}, title = {Towards Robust Event-guided Low-Light Image Enhancement: A Large-Scale Real-World Event-Image Dataset and Novel Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23-33} }
Zero-Reference Low-Light Enhancement via Physical Quadruple Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Wenjing and Yang, Huan and Fu, Jianlong and Liu, Jiaying}, title = {Zero-Reference Low-Light Enhancement via Physical Quadruple Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26057-26066} }
LLaMA-Excitor: General Instruction Tuning via Indirect Feature Interaction-
[pdf]
[supp]
[bibtex]@InProceedings{Zou_2024_CVPR, author = {Zou, Bo and Yang, Chao and Qiao, Yu and Quan, Chengbin and Zhao, Youjian}, title = {LLaMA-Excitor: General Instruction Tuning via Indirect Feature Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14089-14099} }
NeRFCodec: Neural Feature Compression Meets Neural Radiance Fields for Memory-Efficient Scene Representation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Sicheng and Li, Hao and Liao, Yiyi and Yu, Lu}, title = {NeRFCodec: Neural Feature Compression Meets Neural Radiance Fields for Memory-Efficient Scene Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21274-21283} }
From a Bird's Eye View to See: Joint Camera and Subject Registration without the Camera Calibration-
[pdf]
[supp]
[bibtex]@InProceedings{Qian_2024_CVPR, author = {Qian, Zekun and Han, Ruize and Feng, Wei and Wang, Song}, title = {From a Bird's Eye View to See: Joint Camera and Subject Registration without the Camera Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {863-873} }
Steerers: A Framework for Rotation Equivariant Keypoint Descriptors-
[pdf]
[supp]
[bibtex]@InProceedings{Bokman_2024_CVPR, author = {B\"okman, Georg and Edstedt, Johan and Felsberg, Michael and Kahl, Fredrik}, title = {Steerers: A Framework for Rotation Equivariant Keypoint Descriptors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4885-4895} }
Efficient Dataset Distillation via Minimax Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gu_2024_CVPR, author = {Gu, Jianyang and Vahidian, Saeed and Kungurtsev, Vyacheslav and Wang, Haonan and Jiang, Wei and You, Yang and Chen, Yiran}, title = {Efficient Dataset Distillation via Minimax Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15793-15803} }
Posterior Distillation Sampling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Koo_2024_CVPR, author = {Koo, Juil and Park, Chanho and Sung, Minhyuk}, title = {Posterior Distillation Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13352-13361} }
HOISDF: Constraining 3D Hand-Object Pose Estimation with Global Signed Distance Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qi_2024_CVPR, author = {Qi, Haozhe and Zhao, Chen and Salzmann, Mathieu and Mathis, Alexander}, title = {HOISDF: Constraining 3D Hand-Object Pose Estimation with Global Signed Distance Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10392-10402} }
Enhancing Video Super-Resolution via Implicit Resampling-based Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Kai and Yu, Ziwei and Wang, Xin and Mi, Michael Bi and Yao, Angela}, title = {Enhancing Video Super-Resolution via Implicit Resampling-based Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2546-2555} }
DiffPortrait3D: Controllable Diffusion for Zero-Shot Portrait View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gu_2024_CVPR, author = {Gu, Yuming and Xu, Hongyi and Xie, You and Song, Guoxian and Shi, Yichun and Chang, Di and Yang, Jing and Luo, Linjie}, title = {DiffPortrait3D: Controllable Diffusion for Zero-Shot Portrait View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10456-10465} }
Rethinking Transformers Pre-training for Multi-Spectral Satellite Imagery-
[pdf]
[arXiv]
[bibtex]@InProceedings{Noman_2024_CVPR, author = {Noman, Mubashir and Naseer, Muzammal and Cholakkal, Hisham and Anwer, Rao Muhammad and Khan, Salman and Khan, Fahad Shahbaz}, title = {Rethinking Transformers Pre-training for Multi-Spectral Satellite Imagery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27811-27819} }
LLM4SGG: Large Language Models for Weakly Supervised Scene Graph Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Kibum and Yoon, Kanghoon and Jeon, Jaehyeong and In, Yeonjun and Moon, Jinyoung and Kim, Donghyun and Park, Chanyoung}, title = {LLM4SGG: Large Language Models for Weakly Supervised Scene Graph Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28306-28316} }
Parameter Efficient Fine-tuning via Cross Block Orchestration for Segment Anything Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2024_CVPR, author = {Peng, Zelin and Xu, Zhengqin and Zeng, Zhilin and Xie, Lingxi and Tian, Qi and Shen, Wei}, title = {Parameter Efficient Fine-tuning via Cross Block Orchestration for Segment Anything Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3743-3752} }
Neural Directional Encoding for Efficient and Accurate View-Dependent Appearance Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Liwen and Bi, Sai and Xu, Zexiang and Luan, Fujun and Zhang, Kai and Georgiev, Iliyan and Sunkavalli, Kalyan and Ramamoorthi, Ravi}, title = {Neural Directional Encoding for Efficient and Accurate View-Dependent Appearance Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21157-21166} }
Masked and Shuffled Blind Spot Denoising for Real-World Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chihaoui_2024_CVPR, author = {Chihaoui, Hamadi and Favaro, Paolo}, title = {Masked and Shuffled Blind Spot Denoising for Real-World Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3025-3034} }
Label Propagation for Zero-shot Classification with Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Stojni?_2024_CVPR, author = {Stojni?, Vladan and Kalantidis, Yannis and Tolias, Giorgos}, title = {Label Propagation for Zero-shot Classification with Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23209-23218} }
DiffusionAvatars: Deferred Diffusion for High-fidelity 3D Head Avatars-
[pdf]
[supp]
[bibtex]@InProceedings{Kirschstein_2024_CVPR, author = {Kirschstein, Tobias and Giebenhain, Simon and Nie{\ss}ner, Matthias}, title = {DiffusionAvatars: Deferred Diffusion for High-fidelity 3D Head Avatars}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5481-5492} }
Data-Free Quantization via Pseudo-label Filtering-
[pdf]
[bibtex]@InProceedings{Fan_2024_CVPR, author = {Fan, Chunxiao and Wang, Ziqi and Guo, Dan and Wang, Meng}, title = {Data-Free Quantization via Pseudo-label Filtering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5589-5598} }
Revisiting Global Translation Estimation with Feature Tracks-
[pdf]
[supp]
[bibtex]@InProceedings{Tao_2024_CVPR, author = {Tao, Peilin and Cui, Hainan and Rong, Mengqi and Shen, Shuhan}, title = {Revisiting Global Translation Estimation with Feature Tracks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20686-20696} }
Open-Set Domain Adaptation for Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choe_2024_CVPR, author = {Choe, Seun-An and Shin, Ah-Hyung and Park, Keon-Hee and Choi, Jinwoo and Park, Gyeong-Moon}, title = {Open-Set Domain Adaptation for Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23943-23953} }
Generative Powers of Ten-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Xiaojuan and Kontkanen, Janne and Curless, Brian and Seitz, Steven M. and Kemelmacher-Shlizerman, Ira and Mildenhall, Ben and Srinivasan, Pratul and Verbin, Dor and Holynski, Aleksander}, title = {Generative Powers of Ten}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7173-7182} }
H-ViT: A Hierarchical Vision Transformer for Deformable Image Registration-
[pdf]
[supp]
[bibtex]@InProceedings{Ghahremani_2024_CVPR, author = {Ghahremani, Morteza and Khateri, Mohammad and Jian, Bailiang and Wiestler, Benedikt and Adeli, Ehsan and Wachinger, Christian}, title = {H-ViT: A Hierarchical Vision Transformer for Deformable Image Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11513-11523} }
Sculpting Holistic 3D Representation in Contrastive Language-Image-3D Pre-training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2024_CVPR, author = {Gao, Yipeng and Wang, Zeyu and Zheng, Wei-Shi and Xie, Cihang and Zhou, Yuyin}, title = {Sculpting Holistic 3D Representation in Contrastive Language-Image-3D Pre-training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22998-23008} }
Probing Synergistic High-Order Interaction in Infrared and Visible Image Fusion-
[pdf]
[bibtex]@InProceedings{Zheng_2024_CVPR, author = {Zheng, Naishan and Zhou, Man and Huang, Jie and Hou, Junming and Li, Haoying and Xu, Yuan and Zhao, Feng}, title = {Probing Synergistic High-Order Interaction in Infrared and Visible Image Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26384-26395} }
VideoLLM-online: Online Video Large Language Model for Streaming Video-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Joya and Lv, Zhaoyang and Wu, Shiwei and Lin, Kevin Qinghong and Song, Chenan and Gao, Difei and Liu, Jia-Wei and Gao, Ziteng and Mao, Dongxing and Shou, Mike Zheng}, title = {VideoLLM-online: Online Video Large Language Model for Streaming Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18407-18418} }
Text-conditional Attribute Alignment across Latent Spaces for 3D Controllable Face Image Synthesis-
[pdf]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Feifan and Li, Rui and Wu, Si and Xu, Yong and Wong, Hau San}, title = {Text-conditional Attribute Alignment across Latent Spaces for 3D Controllable Face Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9172-9181} }
ESCAPE: Encoding Super-keypoints for Category-Agnostic Pose Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Nguyen_2024_CVPR, author = {Nguyen, Khoi Duc and Li, Chen and Lee, Gim Hee}, title = {ESCAPE: Encoding Super-keypoints for Category-Agnostic Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23491-23500} }
Correcting Diffusion Generation through Resampling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Yujian and Zhang, Yang and Jaakkola, Tommi and Chang, Shiyu}, title = {Correcting Diffusion Generation through Resampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8713-8723} }
Towards Better Vision-Inspired Vision-Language Models-
[pdf]
[bibtex]@InProceedings{Cao_2024_CVPR, author = {Cao, Yun-Hao and Ji, Kaixiang and Huang, Ziyuan and Zheng, Chuanyang and Liu, Jiajia and Wang, Jian and Chen, Jingdong and Yang, Ming}, title = {Towards Better Vision-Inspired Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13537-13547} }
VSRD: Instance-Aware Volumetric Silhouette Rendering for Weakly Supervised 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Zihua and Sakuma, Hiroki and Okutomi, Masatoshi}, title = {VSRD: Instance-Aware Volumetric Silhouette Rendering for Weakly Supervised 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17354-17363} }
RILA: Reflective and Imaginative Language Agent for Zero-Shot Semantic Audio-Visual Navigation-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Zeyuan and Liu, Jiageng and Chen, Peihao and Cherian, Anoop and Marks, Tim K. and Le Roux, Jonathan and Gan, Chuang}, title = {RILA: Reflective and Imaginative Language Agent for Zero-Shot Semantic Audio-Visual Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16251-16261} }
Endow SAM with Keen Eyes: Temporal-spatial Prompt Learning for Video Camouflaged Object Detection-
[pdf]
[bibtex]@InProceedings{Hui_2024_CVPR, author = {Hui, Wenjun and Zhu, Zhenfeng and Zheng, Shuai and Zhao, Yao}, title = {Endow SAM with Keen Eyes: Temporal-spatial Prompt Learning for Video Camouflaged Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19058-19067} }
TULIP: Multi-camera 3D Precision Assessment of Parkinson's Disease-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Kyungdo and Lyu, Sihan and Mantri, Sneha and Dunn, Timothy W.}, title = {TULIP: Multi-camera 3D Precision Assessment of Parkinson's Disease}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22551-22562} }
HybridNeRF: Efficient Neural Rendering via Adaptive Volumetric Surfaces-
[pdf]
[supp]
[bibtex]@InProceedings{Turki_2024_CVPR, author = {Turki, Haithem and Agrawal, Vasu and Bul\`o, Samuel Rota and Porzi, Lorenzo and Kontschieder, Peter and Ramanan, Deva and Zollh\"ofer, Michael and Richardt, Christian}, title = {HybridNeRF: Efficient Neural Rendering via Adaptive Volumetric Surfaces}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19647-19656} }
AirPlanes: Accurate Plane Estimation via 3D-Consistent Embeddings-
[pdf]
[arXiv]
[bibtex]@InProceedings{Watson_2024_CVPR, author = {Watson, Jamie and Aleotti, Filippo and Sayed, Mohamed and Qureshi, Zawar and Mac Aodha, Oisin and Brostow, Gabriel and Firman, Michael and Vicente, Sara}, title = {AirPlanes: Accurate Plane Estimation via 3D-Consistent Embeddings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5270-5280} }
Forgery-aware Adaptive Transformer for Generalizable Synthetic Image Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Huan and Tan, Zichang and Tan, Chuangchuang and Wei, Yunchao and Wang, Jingdong and Zhao, Yao}, title = {Forgery-aware Adaptive Transformer for Generalizable Synthetic Image Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10770-10780} }
PostureHMR: Posture Transformation for 3D Human Mesh Recovery-
[pdf]
[supp]
[bibtex]@InProceedings{Song_2024_CVPR, author = {Song, Yu-Pei and Wu, Xiao and Yuan, Zhaoquan and Qiao, Jian-Jun and Peng, Qiang}, title = {PostureHMR: Posture Transformation for 3D Human Mesh Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9732-9741} }
Blur2Blur: Blur Conversion for Unsupervised Image Deblurring on Unknown Domains-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pham_2024_CVPR, author = {Pham, Bang-Dang and Tran, Phong and Tran, Anh and Pham, Cuong and Nguyen, Rang and Hoai, Minh}, title = {Blur2Blur: Blur Conversion for Unsupervised Image Deblurring on Unknown Domains}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2804-2813} }
Dynamic Adapter Meets Prompt Tuning: Parameter-Efficient Transfer Learning for Point Cloud Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Xin and Liang, Dingkang and Xu, Wei and Zhu, Xingkui and Xu, Yihan and Zou, Zhikang and Bai, Xiang}, title = {Dynamic Adapter Meets Prompt Tuning: Parameter-Efficient Transfer Learning for Point Cloud Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14707-14717} }
Exploring Vision Transformers for 3D Human Motion-Language Models with Motion Patches-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Qing and Tanaka, Mikihiro and Fujiwara, Kent}, title = {Exploring Vision Transformers for 3D Human Motion-Language Models with Motion Patches}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {937-946} }
Motion-adaptive Separable Collaborative Filters for Blind Motion Deblurring-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Chengxu and Wang, Xuan and Xu, Xiangyu and Tian, Ruhao and Li, Shuai and Qian, Xueming and Yang, Ming-Hsuan}, title = {Motion-adaptive Separable Collaborative Filters for Blind Motion Deblurring}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25595-25605} }
DART: Implicit Doppler Tomography for Radar Novel View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Tianshu and Miller, John and Prabhakara, Akarsh and Jin, Tao and Laroia, Tarana and Kolter, Zico and Rowe, Anthony}, title = {DART: Implicit Doppler Tomography for Radar Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24118-24129} }
Wonder3D: Single Image to 3D using Cross-Domain Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Long_2024_CVPR, author = {Long, Xiaoxiao and Guo, Yuan-Chen and Lin, Cheng and Liu, Yuan and Dou, Zhiyang and Liu, Lingjie and Ma, Yuexin and Zhang, Song-Hai and Habermann, Marc and Theobalt, Christian and Wang, Wenping}, title = {Wonder3D: Single Image to 3D using Cross-Domain Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9970-9980} }
Genuine Knowledge from Practice: Diffusion Test-Time Adaptation for Video Adverse Weather Removal-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Yijun and Wu, Hongtao and Aviles-Rivero, Angelica I. and Zhang, Yulun and Qin, Jing and Zhu, Lei}, title = {Genuine Knowledge from Practice: Diffusion Test-Time Adaptation for Video Adverse Weather Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25606-25616} }
Gradient-based Parameter Selection for Efficient Fine-Tuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Zhi and Zhang, Qizhe and Gao, Zijun and Zhang, Renrui and Shutova, Ekaterina and Zhou, Shiji and Zhang, Shanghang}, title = {Gradient-based Parameter Selection for Efficient Fine-Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28566-28577} }
Clustering for Protein Representation Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Quan_2024_CVPR, author = {Quan, Ruijie and Wang, Wenguan and Ma, Fan and Fan, Hehe and Yang, Yi}, title = {Clustering for Protein Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {319-329} }
CorrMatch: Label Propagation via Correlation Matching for Semi-Supervised Semantic Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Sun_2024_CVPR, author = {Sun, Boyuan and Yang, Yuqi and Zhang, Le and Cheng, Ming-Ming and Hou, Qibin}, title = {CorrMatch: Label Propagation via Correlation Matching for Semi-Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3097-3107} }
Estimating Extreme 3D Image Rotations using Cascaded Attention-
[pdf]
[supp]
[bibtex]@InProceedings{Dekel_2024_CVPR, author = {Dekel, Shay and Keller, Yosi and Cadik, Martin}, title = {Estimating Extreme 3D Image Rotations using Cascaded Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2588-2598} }
RichDreamer: A Generalizable Normal-Depth Diffusion Model for Detail Richness in Text-to-3D-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qiu_2024_CVPR, author = {Qiu, Lingteng and Chen, Guanying and Gu, Xiaodong and Zuo, Qi and Xu, Mutian and Wu, Yushuang and Yuan, Weihao and Dong, Zilong and Bo, Liefeng and Han, Xiaoguang}, title = {RichDreamer: A Generalizable Normal-Depth Diffusion Model for Detail Richness in Text-to-3D}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9914-9925} }
Adapt or Perish: Adaptive Sparse Transformer with Attentive Feature Refinement for Image Restoration-
[pdf]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Shihao and Chen, Duosheng and Pan, Jinshan and Shi, Jinglei and Yang, Jufeng}, title = {Adapt or Perish: Adaptive Sparse Transformer with Attentive Feature Refinement for Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2952-2963} }
VINECS: Video-based Neural Character Skinning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liao_2024_CVPR, author = {Liao, Zhouyingcheng and Golyanik, Vladislav and Habermann, Marc and Theobalt, Christian}, title = {VINECS: Video-based Neural Character Skinning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1377-1387} }
Zero-shot Referring Expression Comprehension via Structural Similarity Between Images and Captions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2024_CVPR, author = {Han, Zeyu and Zhu, Fangrui and Lao, Qianru and Jiang, Huaizu}, title = {Zero-shot Referring Expression Comprehension via Structural Similarity Between Images and Captions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14364-14374} }
Domain Prompt Learning with Quaternion Networks-
[pdf]
[arXiv]
[bibtex]@InProceedings{Cao_2024_CVPR, author = {Cao, Qinglong and Xu, Zhengqin and Chen, Yuntian and Ma, Chao and Yang, Xiaokang}, title = {Domain Prompt Learning with Quaternion Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26637-26646} }
BEHAVIOR Vision Suite: Customizable Dataset Generation via Simulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ge_2024_CVPR, author = {Ge, Yunhao and Tang, Yihe and Xu, Jiashu and Gokmen, Cem and Li, Chengshu and Ai, Wensi and Martinez, Benjamin Jose and Aydin, Arman and Anvari, Mona and Chakravarthy, Ayush K and Yu, Hong-Xing and Wong, Josiah and Srivastava, Sanjana and Lee, Sharon and Zha, Shengxin and Itti, Laurent and Li, Yunzhu and Mart{\'\i}n-Mart{\'\i}n, Roberto and Liu, Miao and Zhang, Pengchuan and Zhang, Ruohan and Fei-Fei, Li and Wu, Jiajun}, title = {BEHAVIOR Vision Suite: Customizable Dataset Generation via Simulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22401-22412} }
Triplane Meets Gaussian Splatting: Fast and Generalizable Single-View 3D Reconstruction with Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zou_2024_CVPR, author = {Zou, Zi-Xin and Yu, Zhipeng and Guo, Yuan-Chen and Li, Yangguang and Liang, Ding and Cao, Yan-Pei and Zhang, Song-Hai}, title = {Triplane Meets Gaussian Splatting: Fast and Generalizable Single-View 3D Reconstruction with Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10324-10335} }
WateRF: Robust Watermarks in Radiance Fields for Protection of Copyrights-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jang_2024_CVPR, author = {Jang, Youngdong and Lee, Dong In and Jang, MinHyuk and Kim, Jong Wook and Yang, Feng and Kim, Sangpil}, title = {WateRF: Robust Watermarks in Radiance Fields for Protection of Copyrights}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12087-12097} }
Gaussian-Flow: 4D Reconstruction with Dynamic 3D Gaussian Particle-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2024_CVPR, author = {Lin, Youtian and Dai, Zuozhuo and Zhu, Siyu and Yao, Yao}, title = {Gaussian-Flow: 4D Reconstruction with Dynamic 3D Gaussian Particle}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21136-21145} }
Your Student is Better Than Expected: Adaptive Teacher-Student Collaboration for Text-Conditional Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Starodubcev_2024_CVPR, author = {Starodubcev, Nikita and Baranchuk, Dmitry and Fedorov, Artem and Babenko, Artem}, title = {Your Student is Better Than Expected: Adaptive Teacher-Student Collaboration for Text-Conditional Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9275-9285} }
DiVAS: Video and Audio Synchronization with Dynamic Frame Rates-
[pdf]
[bibtex]@InProceedings{Fernandez-Labrador_2024_CVPR, author = {Fernandez-Labrador, Clara and Ak\c{c}ay, Mertcan and Abecassis, Eitan and Massich, Joan and Schroers, Christopher}, title = {DiVAS: Video and Audio Synchronization with Dynamic Frame Rates}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26846-26854} }
SHViT: Single-Head Vision Transformer with Memory Efficient Macro Design-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yun_2024_CVPR, author = {Yun, Seokju and Ro, Youngmin}, title = {SHViT: Single-Head Vision Transformer with Memory Efficient Macro Design}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5756-5767} }
HDRFlow: Real-Time HDR Video Reconstruction with Large Motions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Gangwei and Wang, Yujin and Gu, Jinwei and Xue, Tianfan and Yang, Xin}, title = {HDRFlow: Real-Time HDR Video Reconstruction with Large Motions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24851-24860} }
SPIDeRS: Structured Polarization for Invisible Depth and Reflectance Sensing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ichikawa_2024_CVPR, author = {Ichikawa, Tomoki and Nobuhara, Shohei and Nishino, Ko}, title = {SPIDeRS: Structured Polarization for Invisible Depth and Reflectance Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25077-25085} }
SuperNormal: Neural Surface Reconstruction via Multi-View Normal Integration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2024_CVPR, author = {Cao, Xu and Taketomi, Takafumi}, title = {SuperNormal: Neural Surface Reconstruction via Multi-View Normal Integration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20581-20590} }
Instance-aware Contrastive Learning for Occluded Human Mesh Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Gwon_2024_CVPR, author = {Gwon, Mi-Gyeong and Um, Gi-Mun and Cheong, Won-Sik and Kim, Wonjun}, title = {Instance-aware Contrastive Learning for Occluded Human Mesh Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10553-10562} }
ADFactory: An Effective Framework for Generalizing Optical Flow with NeRF-
[pdf]
[supp]
[bibtex]@InProceedings{Ling_2024_CVPR, author = {Ling, Han and Sun, Quansen and Sun, Yinghui and Xu, Xian and Li, Xinfeng}, title = {ADFactory: An Effective Framework for Generalizing Optical Flow with NeRF}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20591-20600} }
Robust Noisy Correspondence Learning with Equivariant Similarity Consistency-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Yuchen and Wang, Likai and Yang, Erkun and Deng, Cheng}, title = {Robust Noisy Correspondence Learning with Equivariant Similarity Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17700-17709} }
CommonCanvas: Open Diffusion Models Trained on Creative-Commons Images-
[pdf]
[supp]
[bibtex]@InProceedings{Gokaslan_2024_CVPR, author = {Gokaslan, Aaron and Cooper, A. Feder and Collins, Jasmine and Seguin, Landan and Jacobson, Austin and Patel, Mihir and Frankle, Jonathan and Stephenson, Cory and Kuleshov, Volodymyr}, title = {CommonCanvas: Open Diffusion Models Trained on Creative-Commons Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8250-8260} }
Prompt-Driven Referring Image Segmentation with Instance Contrasting-
[pdf]
[bibtex]@InProceedings{Shang_2024_CVPR, author = {Shang, Chao and Song, Zichen and Qiu, Heqian and Wang, Lanxiao and Meng, Fanman and Li, Hongliang}, title = {Prompt-Driven Referring Image Segmentation with Instance Contrasting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4124-4134} }
Image Sculpting: Precise Object Editing with 3D Geometry Control-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yenphraphai_2024_CVPR, author = {Yenphraphai, Jiraphon and Pan, Xichen and Liu, Sainan and Panozzo, Daniele and Xie, Saining}, title = {Image Sculpting: Precise Object Editing with 3D Geometry Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4241-4251} }
Compositional Video Understanding with Spatiotemporal Structure-based Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Yun_2024_CVPR, author = {Yun, Hoyeoung and Ahn, Jinwoo and Kim, Minseo and Kim, Eun-Sol}, title = {Compositional Video Understanding with Spatiotemporal Structure-based Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18751-18760} }
3D LiDAR Mapping in Dynamic Environments using a 4D Implicit Neural Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhong_2024_CVPR, author = {Zhong, Xingguang and Pan, Yue and Stachniss, Cyrill and Behley, Jens}, title = {3D LiDAR Mapping in Dynamic Environments using a 4D Implicit Neural Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15417-15427} }
What When and Where? Self-Supervised Spatio-Temporal Grounding in Untrimmed Multi-Action Videos from Narrated Instructions-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Brian and Shvetsova, Nina and Rouditchenko, Andrew and Kondermann, Daniel and Thomas, Samuel and Chang, Shih-Fu and Feris, Rogerio and Glass, James and Kuehne, Hilde}, title = {What When and Where? Self-Supervised Spatio-Temporal Grounding in Untrimmed Multi-Action Videos from Narrated Instructions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18419-18429} }
FoundationPose: Unified 6D Pose Estimation and Tracking of Novel Objects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wen_2024_CVPR, author = {Wen, Bowen and Yang, Wei and Kautz, Jan and Birchfield, Stan}, title = {FoundationPose: Unified 6D Pose Estimation and Tracking of Novel Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17868-17879} }
How Far Can We Compress Instant-NGP-Based NeRF?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Yihang and Wu, Qianyi and Harandi, Mehrtash and Cai, Jianfei}, title = {How Far Can We Compress Instant-NGP-Based NeRF?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20321-20330} }
PFStorer: Personalized Face Restoration and Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Varanka_2024_CVPR, author = {Varanka, Tuomas and Toivonen, Tapani and Tripathy, Soumya and Zhao, Guoying and Acar, Erman}, title = {PFStorer: Personalized Face Restoration and Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2372-2381} }
TextureDreamer: Image-Guided Texture Synthesis Through Geometry-Aware Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yeh_2024_CVPR, author = {Yeh, Yu-Ying and Huang, Jia-Bin and Kim, Changil and Xiao, Lei and Nguyen-Phuoc, Thu and Khan, Numair and Zhang, Cheng and Chandraker, Manmohan and Marshall, Carl S and Dong, Zhao and Li, Zhengqin}, title = {TextureDreamer: Image-Guided Texture Synthesis Through Geometry-Aware Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4304-4314} }
Boosting Image Quality Assessment through Efficient Transformer Adaptation with Local Feature Enhancement-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Kangmin and Liao, Liang and Xiao, Jing and Chen, Chaofeng and Wu, Haoning and Yan, Qiong and Lin, Weisi}, title = {Boosting Image Quality Assessment through Efficient Transformer Adaptation with Local Feature Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2662-2672} }
Hyperbolic Anomaly Detection-
[pdf]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Huimin and Chen, Zhentao and Xu, Yunhao and Hu, Junlin}, title = {Hyperbolic Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17511-17520} }
VLP: Vision Language Planning for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2024_CVPR, author = {Pan, Chenbin and Yaman, Burhaneddin and Nesti, Tommaso and Mallik, Abhirup and Allievi, Alessandro G and Velipasalar, Senem and Ren, Liu}, title = {VLP: Vision Language Planning for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14760-14769} }
Attention Calibration for Disentangled Text-to-Image Personalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yanbing and Yang, Mengping and Zhou, Qin and Wang, Zhe}, title = {Attention Calibration for Disentangled Text-to-Image Personalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4764-4774} }
ProMark: Proactive Diffusion Watermarking for Causal Attribution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Asnani_2024_CVPR, author = {Asnani, Vishal and Collomosse, John and Bui, Tu and Liu, Xiaoming and Agarwal, Shruti}, title = {ProMark: Proactive Diffusion Watermarking for Causal Attribution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10802-10811} }
One-Shot Structure-Aware Stylized Image Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cho_2024_CVPR, author = {Cho, Hansam and Lee, Jonghyun and Chang, Seunggyu and Jeong, Yonghyun}, title = {One-Shot Structure-Aware Stylized Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8302-8311} }
GPT4Point: A Unified Framework for Point-Language Understanding and Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qi_2024_CVPR, author = {Qi, Zhangyang and Fang, Ye and Sun, Zeyi and Wu, Xiaoyang and Wu, Tong and Wang, Jiaqi and Lin, Dahua and Zhao, Hengshuang}, title = {GPT4Point: A Unified Framework for Point-Language Understanding and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26417-26427} }
SemCity: Semantic Scene Generation with Triplane Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2024_CVPR, author = {Lee, Jumin and Lee, Sebin and Jo, Changho and Im, Woobin and Seon, Juhyeong and Yoon, Sung-Eui}, title = {SemCity: Semantic Scene Generation with Triplane Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28337-28347} }
Improving Semantic Correspondence with Viewpoint-Guided Spherical Maps-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mariotti_2024_CVPR, author = {Mariotti, Octave and Mac Aodha, Oisin and Bilen, Hakan}, title = {Improving Semantic Correspondence with Viewpoint-Guided Spherical Maps}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19521-19530} }
MR-VNet: Media Restoration using Volterra Networks-
[pdf]
[bibtex]@InProceedings{Roheda_2024_CVPR, author = {Roheda, Siddharth and Unde, Amit and Rashid, Loay}, title = {MR-VNet: Media Restoration using Volterra Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6098-6107} }
Dual Memory Networks: A Versatile Adaptation Approach for Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yabin and Zhu, Wenjie and Tang, Hui and Ma, Zhiyuan and Zhou, Kaiyang and Zhang, Lei}, title = {Dual Memory Networks: A Versatile Adaptation Approach for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28718-28728} }
Single Mesh Diffusion Models with Field Latents for Texture Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mitchel_2024_CVPR, author = {Mitchel, Thomas W. and Esteves, Carlos and Makadia, Ameesh}, title = {Single Mesh Diffusion Models with Field Latents for Texture Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7953-7963} }
LION: Empowering Multimodal Large Language Model with Dual-Level Visual Knowledge-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Gongwei and Shen, Leyang and Shao, Rui and Deng, Xiang and Nie, Liqiang}, title = {LION: Empowering Multimodal Large Language Model with Dual-Level Visual Knowledge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26540-26550} }
Learning to Select Views for Efficient Multi-View Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Hou_2024_CVPR, author = {Hou, Yunzhong and Gould, Stephen and Zheng, Liang}, title = {Learning to Select Views for Efficient Multi-View Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20135-20144} }
Consistency and Uncertainty: Identifying Unreliable Responses From Black-Box Vision-Language Models for Selective Visual Question Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Khan_2024_CVPR, author = {Khan, Zaid and Fu, Yun}, title = {Consistency and Uncertainty: Identifying Unreliable Responses From Black-Box Vision-Language Models for Selective Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10854-10863} }
SAI3D: Segment Any Instance in 3D Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yin_2024_CVPR, author = {Yin, Yingda and Liu, Yuzheng and Xiao, Yang and Cohen-Or, Daniel and Huang, Jingwei and Chen, Baoquan}, title = {SAI3D: Segment Any Instance in 3D Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3292-3302} }
Implicit Motion Function-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2024_CVPR, author = {Gao, Yue and Li, Jiahao and Chu, Lei and Lu, Yan}, title = {Implicit Motion Function}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19278-19289} }
Unified Entropy Optimization for Open-Set Test-Time Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2024_CVPR, author = {Gao, Zhengqing and Zhang, Xu-Yao and Liu, Cheng-Lin}, title = {Unified Entropy Optimization for Open-Set Test-Time Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23975-23984} }
TexOct: Generating Textures of 3D Models with Octree-based Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Jialun and Wu, Chenming and Liu, Xinqi and Liu, Xing and Wu, Jinbo and Peng, Haotian and Zhao, Chen and Feng, Haocheng and Liu, Jingtuo and Ding, Errui}, title = {TexOct: Generating Textures of 3D Models with Octree-based Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4284-4293} }
Anatomically Constrained Implicit Face Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chandran_2024_CVPR, author = {Chandran, Prashanth and Zoss, Gaspard}, title = {Anatomically Constrained Implicit Face Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2220-2229} }
Expandable Subspace Ensemble for Pre-Trained Model-Based Class-Incremental Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Da-Wei and Sun, Hai-Long and Ye, Han-Jia and Zhan, De-Chuan}, title = {Expandable Subspace Ensemble for Pre-Trained Model-Based Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23554-23564} }
Capturing Closely Interacted Two-Person Motions with Reaction Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Fang_2024_CVPR, author = {Fang, Qi and Fan, Yinghui and Li, Yanjun and Dong, Junting and Wu, Dingwei and Zhang, Weidong and Chen, Kang}, title = {Capturing Closely Interacted Two-Person Motions with Reaction Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {655-665} }
RobustSAM: Segment Anything Robustly on Degraded Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Wei-Ting and Vong, Yu-Jiet and Kuo, Sy-Yen and Ma, Sizhou and Wang, Jian}, title = {RobustSAM: Segment Anything Robustly on Degraded Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4081-4091} }
MultiDiff: Consistent Novel View Synthesis from a Single Image-
[pdf]
[supp]
[bibtex]@InProceedings{Muller_2024_CVPR, author = {M\"uller, Norman and Schwarz, Katja and R\"ossle, Barbara and Porzi, Lorenzo and Bul\`o, Samuel Rota and Nie{\ss}ner, Matthias and Kontschieder, Peter}, title = {MultiDiff: Consistent Novel View Synthesis from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10258-10268} }
In-N-Out: Faithful 3D GAN Inversion with Volumetric Decomposition for Face Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Yiran and Shu, Zhixin and Smith, Cameron and Oh, Seoung Wug and Huang, Jia-Bin}, title = {In-N-Out: Faithful 3D GAN Inversion with Volumetric Decomposition for Face Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7225-7235} }
Atom-Level Optical Chemical Structure Recognition with Limited Supervision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Oldenhof_2024_CVPR, author = {Oldenhof, Martijn and De Brouwer, Edward and Arany, Adam and Moreau, Yves}, title = {Atom-Level Optical Chemical Structure Recognition with Limited Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17669-17678} }
L4D-Track: Language-to-4D Modeling Towards 6-DoF Tracking and Shape Reconstruction in 3D Point Cloud Stream-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2024_CVPR, author = {Sun, Jingtao and Wang, Yaonan and Feng, Mingtao and Guo, Yulan and Mian, Ajmal and Shou, Mike Zheng}, title = {L4D-Track: Language-to-4D Modeling Towards 6-DoF Tracking and Shape Reconstruction in 3D Point Cloud Stream}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21146-21156} }
General Point Model Pretraining with Autoencoding and Autoregressive-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Zhe and Gao, Zhangyang and Tan, Cheng and Ren, Bocheng and Yang, Laurence T. and Li, Stan Z.}, title = {General Point Model Pretraining with Autoencoding and Autoregressive}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20954-20964} }
Combining Frame and GOP Embeddings for Neural Video Representation-
[pdf]
[supp]
[bibtex]@InProceedings{Saethre_2024_CVPR, author = {Saethre, Jens Eirik and Azevedo, Roberto and Schroers, Christopher}, title = {Combining Frame and GOP Embeddings for Neural Video Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9253-9263} }
LiDAR-based Person Re-identification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2024_CVPR, author = {Guo, Wenxuan and Pan, Zhiyu and Liang, Yingping and Xi, Ziheng and Zhong, Zhicheng and Feng, Jianjiang and Zhou, Jie}, title = {LiDAR-based Person Re-identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17437-17447} }
Fantastic Animals and Where to Find Them: Segment Any Marine Animal with Dual SAM-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Pingping and Yan, Tianyu and Liu, Yang and Lu, Huchuan}, title = {Fantastic Animals and Where to Find Them: Segment Any Marine Animal with Dual SAM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2578-2587} }
Seeing and Hearing: Open-domain Visual-Audio Generation with Diffusion Latent Aligners-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xing_2024_CVPR, author = {Xing, Yazhou and He, Yingqing and Tian, Zeyue and Wang, Xintao and Chen, Qifeng}, title = {Seeing and Hearing: Open-domain Visual-Audio Generation with Diffusion Latent Aligners}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7151-7161} }
Model Adaptation for Time Constrained Embodied Control-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2024_CVPR, author = {Song, Jaehyun and Yoo, Minjong and Woo, Honguk}, title = {Model Adaptation for Time Constrained Embodied Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16499-16508} }
Objects as Volumes: A Stochastic Geometry View of Opaque Solids-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Miller_2024_CVPR, author = {Miller, Bailey and Chen, Hanyu and Lai, Alice and Gkioulekas, Ioannis}, title = {Objects as Volumes: A Stochastic Geometry View of Opaque Solids}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {87-97} }
ActiveDC: Distribution Calibration for Active Finetuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Wenshuai and Hu, Zhenghui and Lu, Yu and Meng, Jinzhou and Liu, Qingjie and Wang, Yunhong}, title = {ActiveDC: Distribution Calibration for Active Finetuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16996-17005} }
Seeing Unseen: Discover Novel Biomedical Concepts via Geometry-Constrained Probabilistic Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2024_CVPR, author = {Fan, Jianan and Liu, Dongnan and Chang, Hang and Huang, Heng and Chen, Mei and Cai, Weidong}, title = {Seeing Unseen: Discover Novel Biomedical Concepts via Geometry-Constrained Probabilistic Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11524-11534} }
MVHumanNet: A Large-scale Dataset of Multi-view Daily Dressing Human Captures-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiong_2024_CVPR, author = {Xiong, Zhangyang and Li, Chenghong and Liu, Kenkun and Liao, Hongjie and Hu, Jianqiao and Zhu, Junyi and Ning, Shuliang and Qiu, Lingteng and Wang, Chongjie and Wang, Shijie and Cui, Shuguang and Han, Xiaoguang}, title = {MVHumanNet: A Large-scale Dataset of Multi-view Daily Dressing Human Captures}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19801-19811} }
Communication-Efficient Federated Learning with Accelerated Client Gradient-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Geeho and Kim, Jinkyu and Han, Bohyung}, title = {Communication-Efficient Federated Learning with Accelerated Client Gradient}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12385-12394} }
LLMs are Good Action Recognizers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qu_2024_CVPR, author = {Qu, Haoxuan and Cai, Yujun and Liu, Jun}, title = {LLMs are Good Action Recognizers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18395-18406} }
NoiseCLR: A Contrastive Learning Approach for Unsupervised Discovery of Interpretable Directions in Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dalva_2024_CVPR, author = {Dalva, Yusuf and Yanardag, Pinar}, title = {NoiseCLR: A Contrastive Learning Approach for Unsupervised Discovery of Interpretable Directions in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24209-24218} }
SpecNeRF: Gaussian Directional Encoding for Specular Reflections-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2024_CVPR, author = {Ma, Li and Agrawal, Vasu and Turki, Haithem and Kim, Changil and Gao, Chen and Sander, Pedro and Zollh\"ofer, Michael and Richardt, Christian}, title = {SpecNeRF: Gaussian Directional Encoding for Specular Reflections}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21188-21198} }
Improving Subject-Driven Image Synthesis with Subject-Agnostic Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chan_2024_CVPR, author = {Chan, Kelvin C.K. and Zhao, Yang and Jia, Xuhui and Yang, Ming-Hsuan and Wang, Huisheng}, title = {Improving Subject-Driven Image Synthesis with Subject-Agnostic Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6733-6742} }
Diffusion Model Alignment Using Direct Preference Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wallace_2024_CVPR, author = {Wallace, Bram and Dang, Meihua and Rafailov, Rafael and Zhou, Linqi and Lou, Aaron and Purushwalkam, Senthil and Ermon, Stefano and Xiong, Caiming and Joty, Shafiq and Naik, Nikhil}, title = {Diffusion Model Alignment Using Direct Preference Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8228-8238} }
Interactive Continual Learning: Fast and Slow Thinking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qi_2024_CVPR, author = {Qi, Biqing and Chen, Xinquan and Gao, Junqi and Li, Dong and Liu, Jianxing and Wu, Ligang and Zhou, Bowen}, title = {Interactive Continual Learning: Fast and Slow Thinking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12882-12892} }
ZeroNVS: Zero-Shot 360-Degree View Synthesis from a Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sargent_2024_CVPR, author = {Sargent, Kyle and Li, Zizhang and Shah, Tanmay and Herrmann, Charles and Yu, Hong-Xing and Zhang, Yunzhi and Chan, Eric Ryan and Lagun, Dmitry and Fei-Fei, Li and Sun, Deqing and Wu, Jiajun}, title = {ZeroNVS: Zero-Shot 360-Degree View Synthesis from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9420-9429} }
Restoration by Generation with Constrained Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ding_2024_CVPR, author = {Ding, Zheng and Zhang, Xuaner and Tu, Zhuowen and Xia, Zhihao}, title = {Restoration by Generation with Constrained Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2567-2577} }
Snapshot Lidar: Fourier Embedding of Amplitude and Phase for Single-Image Depth Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Friday_2024_CVPR, author = {Friday, Sarah and Shi, Yunzi and Cherivirala, Yaswanth and Saragadam, Vishwanath and Pediredla, Adithya}, title = {Snapshot Lidar: Fourier Embedding of Amplitude and Phase for Single-Image Depth Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25203-25212} }
Convolutional Prompting meets Language Models for Continual Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Roy_2024_CVPR, author = {Roy, Anurag and Moulick, Riddhiman and Verma, Vinay K. and Ghosh, Saptarshi and Das, Abir}, title = {Convolutional Prompting meets Language Models for Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23616-23626} }
Blur-aware Spatio-temporal Sparse Transformer for Video Deblurring-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Huicong and Xie, Haozhe and Yao, Hongxun}, title = {Blur-aware Spatio-temporal Sparse Transformer for Video Deblurring}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2673-2681} }
Towards Learning a Generalist Model for Embodied Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2024_CVPR, author = {Zheng, Duo and Huang, Shijia and Zhao, Lin and Zhong, Yiwu and Wang, Liwei}, title = {Towards Learning a Generalist Model for Embodied Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13624-13634} }
DiffusionPoser: Real-time Human Motion Reconstruction From Arbitrary Sparse Sensors Using Autoregressive Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Van_Wouwe_2024_CVPR, author = {Van Wouwe, Tom and Lee, Seunghwan and Falisse, Antoine and Delp, Scott and Liu, C. Karen}, title = {DiffusionPoser: Real-time Human Motion Reconstruction From Arbitrary Sparse Sensors Using Autoregressive Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2513-2523} }
MANUS: Markerless Grasp Capture using Articulated 3D Gaussians-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pokhariya_2024_CVPR, author = {Pokhariya, Chandradeep and Shah, Ishaan Nikhil and Xing, Angela and Li, Zekun and Chen, Kefan and Sharma, Avinash and Sridhar, Srinath}, title = {MANUS: Markerless Grasp Capture using Articulated 3D Gaussians}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2197-2208} }
Distilling Semantic Priors from SAM to Efficient Image Restoration Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Quan and Liu, Xiaoyu and Li, Wei and Chen, Hanting and Liu, Junchao and Hu, Jie and Xiong, Zhiwei and Yuan, Chun and Wang, Yunhe}, title = {Distilling Semantic Priors from SAM to Efficient Image Restoration Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25409-25419} }
Learning Intra-view and Cross-view Geometric Knowledge for Stereo Matching-
[pdf]
[arXiv]
[bibtex]@InProceedings{Gong_2024_CVPR, author = {Gong, Rui and Liu, Weide and Gu, Zaiwang and Yang, Xulei and Cheng, Jun}, title = {Learning Intra-view and Cross-view Geometric Knowledge for Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20752-20762} }
Rethinking the Evaluation Protocol of Domain Generalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Han and Zhang, Xingxuan and Xu, Renzhe and Liu, Jiashuo and He, Yue and Cui, Peng}, title = {Rethinking the Evaluation Protocol of Domain Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21897-21908} }
Aligning Logits Generatively for Principled Black-Box Knowledge Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2024_CVPR, author = {Ma, Jing and Xiang, Xiang and Wang, Ke and Wu, Yuchuan and Li, Yongbin}, title = {Aligning Logits Generatively for Principled Black-Box Knowledge Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23148-23157} }
BerfScene: Bev-conditioned Equivariant Radiance Fields for Infinite 3D Scene Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Qihang and Xu, Yinghao and Shen, Yujun and Dai, Bo and Zhou, Bolei and Yang, Ceyuan}, title = {BerfScene: Bev-conditioned Equivariant Radiance Fields for Infinite 3D Scene Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6839-6849} }
3D Facial Expressions through Analysis-by-Neural-Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Retsinas_2024_CVPR, author = {Retsinas, George and Filntisis, Panagiotis P. and Danecek, Radek and Abrevaya, Victoria F. and Roussos, Anastasios and Bolkart, Timo and Maragos, Petros}, title = {3D Facial Expressions through Analysis-by-Neural-Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2490-2501} }
HoloVIC: Large-scale Dataset and Benchmark for Multi-Sensor Holographic Intersection and Vehicle-Infrastructure Cooperative-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2024_CVPR, author = {Ma, Cong and Qiao, Lei and Zhu, Chengkai and Liu, Kai and Kong, Zelong and Li, Qing and Zhou, Xueqi and Kan, Yuheng and Wu, Wei}, title = {HoloVIC: Large-scale Dataset and Benchmark for Multi-Sensor Holographic Intersection and Vehicle-Infrastructure Cooperative}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22129-22138} }
Unleashing the Potential of SAM for Medical Adaptation via Hierarchical Decoding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2024_CVPR, author = {Cheng, Zhiheng and Wei, Qingyue and Zhu, Hongru and Wang, Yan and Qu, Liangqiong and Shao, Wei and Zhou, Yuyin}, title = {Unleashing the Potential of SAM for Medical Adaptation via Hierarchical Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3511-3522} }
Puff-Net: Efficient Style Transfer with Pure Content and Style Feature Fusion Network-
[pdf]
[supp]
[bibtex]@InProceedings{Zheng_2024_CVPR, author = {Zheng, Sizhe and Gao, Pan and Zhou, Peng and Qin, Jie}, title = {Puff-Net: Efficient Style Transfer with Pure Content and Style Feature Fusion Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8059-8068} }
Towards Progressive Multi-Frequency Representation for Image Warping-
[pdf]
[bibtex]@InProceedings{Xiao_2024_CVPR, author = {Xiao, Jun and Lyu, Zihang and Zhang, Cong and Ju, Yakun and Shui, Changjian and Lam, Kin-Man}, title = {Towards Progressive Multi-Frequency Representation for Image Warping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2995-3004} }
Learning to Control Camera Exposure via Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2024_CVPR, author = {Lee, Kyunghyun and Shin, Ukcheol and Lee, Byeong-Uk}, title = {Learning to Control Camera Exposure via Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2975-2983} }
Splatter Image: Ultra-Fast Single-View 3D Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Szymanowicz_2024_CVPR, author = {Szymanowicz, Stanislaw and Rupprecht, Chrisitian and Vedaldi, Andrea}, title = {Splatter Image: Ultra-Fast Single-View 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10208-10217} }
Modeling Collaborator: Enabling Subjective Vision Classification With Minimal Human Effort via LLM Tool-Use-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Toubal_2024_CVPR, author = {Toubal, Imad Eddine and Avinash, Aditya and Alldrin, Neil Gordon and Dlabal, Jan and Zhou, Wenlei and Luo, Enming and Stretcu, Otilia and Xiong, Hao and Lu, Chun-Ta and Zhou, Howard and Krishna, Ranjay and Fuxman, Ariel and Duerig, Tom}, title = {Modeling Collaborator: Enabling Subjective Vision Classification With Minimal Human Effort via LLM Tool-Use}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17553-17563} }
RNb-NeuS: Reflectance and Normal-based Multi-View 3D Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Brument_2024_CVPR, author = {Brument, Baptiste and Bruneau, Robin and Qu\'eau, Yvain and M\'elou, Jean and Lauze, Fran\c{c}ois Bernard and Durou, Jean-Denis and Calvet, Lilian}, title = {RNb-NeuS: Reflectance and Normal-based Multi-View 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5230-5239} }
LOTUS: Evasive and Resilient Backdoor Attacks through Sub-Partitioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2024_CVPR, author = {Cheng, Siyuan and Tao, Guanhong and Liu, Yingqi and Shen, Guangyu and An, Shengwei and Feng, Shiwei and Xu, Xiangzhe and Zhang, Kaiyuan and Ma, Shiqing and Zhang, Xiangyu}, title = {LOTUS: Evasive and Resilient Backdoor Attacks through Sub-Partitioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24798-24809} }
GeoReF: Geometric Alignment Across Shape Variation for Category-level Object Pose Refinement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2024_CVPR, author = {Zheng, Linfang and Tse, Tze Ho Elden and Wang, Chen and Sun, Yinghan and Chen, Hua and Leonardis, Ales and Zhang, Wei and Chang, Hyung Jin}, title = {GeoReF: Geometric Alignment Across Shape Variation for Category-level Object Pose Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10693-10703} }
LAN: Learning to Adapt Noise for Image Denoising-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Changjin and Kim, Tae Hyun and Baik, Sungyong}, title = {LAN: Learning to Adapt Noise for Image Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25193-25202} }
Scaling Up Dynamic Human-Scene Interaction Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2024_CVPR, author = {Jiang, Nan and Zhang, Zhiyuan and Li, Hongjie and Ma, Xiaoxuan and Wang, Zan and Chen, Yixin and Liu, Tengyu and Zhu, Yixin and Huang, Siyuan}, title = {Scaling Up Dynamic Human-Scene Interaction Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1737-1747} }
Semantic-aware SAM for Point-Prompted Instance Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2024_CVPR, author = {Wei, Zhaoyang and Chen, Pengfei and Yu, Xuehui and Li, Guorong and Jiao, Jianbin and Han, Zhenjun}, title = {Semantic-aware SAM for Point-Prompted Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3585-3594} }
Learning Group Activity Features Through Person Attribute Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nakatani_2024_CVPR, author = {Nakatani, Chihiro and Kawashima, Hiroaki and Ukita, Norimichi}, title = {Learning Group Activity Features Through Person Attribute Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18233-18242} }
HUNTER: Unsupervised Human-centric 3D Detection via Transferring Knowledge from Synthetic Instances to Real Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yao_2024_CVPR, author = {Yao, Yichen and Jiang, Zimo and Sun, Yujing and Zhu, Zhencai and Zhu, Xinge and Chen, Runnan and Ma, Yuexin}, title = {HUNTER: Unsupervised Human-centric 3D Detection via Transferring Knowledge from Synthetic Instances to Real Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28120-28129} }
Improving Transferable Targeted Adversarial Attacks with Model Self-Enhancement-
[pdf]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Han and Ou, Guanyan and Wu, Weibin and Zheng, Zibin}, title = {Improving Transferable Targeted Adversarial Attacks with Model Self-Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24615-24624} }
Unsupervised Learning of Category-Level 3D Pose from Object-Centric Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sommer_2024_CVPR, author = {Sommer, Leonhard and Jesslen, Artur and Ilg, Eddy and Kortylewski, Adam}, title = {Unsupervised Learning of Category-Level 3D Pose from Object-Centric Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22787-22796} }
Plug-and-Play Diffusion Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hsiao_2024_CVPR, author = {Hsiao, Yi-Ting and Khodadadeh, Siavash and Duarte, Kevin and Lin, Wei-An and Qu, Hui and Kwon, Mingi and Kalarot, Ratheesh}, title = {Plug-and-Play Diffusion Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13743-13752} }
MindBridge: A Cross-Subject Brain Decoding Framework-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Shizun and Liu, Songhua and Tan, Zhenxiong and Wang, Xinchao}, title = {MindBridge: A Cross-Subject Brain Decoding Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11333-11342} }
Make Pixels Dance: High-Dynamic Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2024_CVPR, author = {Zeng, Yan and Wei, Guoqiang and Zheng, Jiani and Zou, Jiaxin and Wei, Yang and Zhang, Yuchen and Li, Hang}, title = {Make Pixels Dance: High-Dynamic Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8850-8860} }
MM-Narrator: Narrating Long-form Videos with Multimodal In-Context Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Chaoyi and Lin, Kevin and Yang, Zhengyuan and Wang, Jianfeng and Li, Linjie and Lin, Chung-Ching and Liu, Zicheng and Wang, Lijuan}, title = {MM-Narrator: Narrating Long-form Videos with Multimodal In-Context Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13647-13657} }
Morphable Diffusion: 3D-Consistent Diffusion for Single-image Avatar Creation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Xiyi and Mihajlovic, Marko and Wang, Shaofei and Prokudin, Sergey and Tang, Siyu}, title = {Morphable Diffusion: 3D-Consistent Diffusion for Single-image Avatar Creation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10359-10370} }
Fully Convolutional Slice-to-Volume Reconstruction for Single-Stack MRI-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Young_2024_CVPR, author = {Young, Sean I. and Balbastre, Yael and Fischl, Bruce and Golland, Polina and Iglesias, Juan Eugenio}, title = {Fully Convolutional Slice-to-Volume Reconstruction for Single-Stack MRI}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11535-11545} }
Enhance Image Classification via Inter-Class Image Mixup with Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Zhicai and Wei, Longhui and Wang, Tan and Chen, Heyu and Hao, Yanbin and Wang, Xiang and He, Xiangnan and Tian, Qi}, title = {Enhance Image Classification via Inter-Class Image Mixup with Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17223-17233} }
A&B BNN: Add&Bit-Operation-Only Hardware-Friendly Binary Neural Network-
[pdf]
[bibtex]@InProceedings{Ma_2024_CVPR, author = {Ma, Ruichen and Qiao, Guanchao and Liu, Yian and Meng, Liwei and Ning, Ning and Liu, Yang and Hu, Shaogang}, title = {A\&B BNN: Add\&Bit-Operation-Only Hardware-Friendly Binary Neural Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5704-5713} }
Alpha-CLIP: A CLIP Model Focusing on Wherever You Want-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2024_CVPR, author = {Sun, Zeyi and Fang, Ye and Wu, Tong and Zhang, Pan and Zang, Yuhang and Kong, Shu and Xiong, Yuanjun and Lin, Dahua and Wang, Jiaqi}, title = {Alpha-CLIP: A CLIP Model Focusing on Wherever You Want}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13019-13029} }
FutureHuman3D: Forecasting Complex Long-Term 3D Human Behavior from Video Observations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Diller_2024_CVPR, author = {Diller, Christian and Funkhouser, Thomas and Dai, Angela}, title = {FutureHuman3D: Forecasting Complex Long-Term 3D Human Behavior from Video Observations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19902-19914} }
NightCC: Nighttime Color Constancy via Adaptive Channel Masking-
[pdf]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Shuwei and Tan, Robby T.}, title = {NightCC: Nighttime Color Constancy via Adaptive Channel Masking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25522-25531} }
Task-aligned Part-aware Panoptic Segmentation through Joint Object-Part Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{de_Geus_2024_CVPR, author = {de Geus, Daan and Dubbelman, Gijs}, title = {Task-aligned Part-aware Panoptic Segmentation through Joint Object-Part Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3174-3183} }
From Activation to Initialization: Scaling Insights for Optimizing Neural Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Saratchandran_2024_CVPR, author = {Saratchandran, Hemanth and Ramasinghe, Sameera and Lucey, Simon}, title = {From Activation to Initialization: Scaling Insights for Optimizing Neural Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {413-422} }
UnScene3D: Unsupervised 3D Instance Segmentation for Indoor Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rozenberszki_2024_CVPR, author = {Rozenberszki, David and Litany, Or and Dai, Angela}, title = {UnScene3D: Unsupervised 3D Instance Segmentation for Indoor Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19957-19967} }
Nearest is Not Dearest: Towards Practical Defense against Quantization-conditioned Backdoor Attacks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Boheng and Cai, Yishuo and Li, Haowei and Xue, Feng and Li, Zhifeng and Li, Yiming}, title = {Nearest is Not Dearest: Towards Practical Defense against Quantization-conditioned Backdoor Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24523-24533} }
DiffAvatar: Simulation-Ready Garment Optimization with Differentiable Simulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Yifei and Chen, Hsiao-yu and Larionov, Egor and Sarafianos, Nikolaos and Matusik, Wojciech and Stuyck, Tuur}, title = {DiffAvatar: Simulation-Ready Garment Optimization with Differentiable Simulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4368-4378} }
AlignSAM: Aligning Segment Anything Model to Open Context via Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Duojun and Xiong, Xinyu and Ma, Jie and Li, Jichang and Jie, Zequn and Ma, Lin and Li, Guanbin}, title = {AlignSAM: Aligning Segment Anything Model to Open Context via Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3205-3215} }
A Simple Recipe for Language-guided Domain Generalized Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fahes_2024_CVPR, author = {Fahes, Mohammad and Vu, Tuan-Hung and Bursuc, Andrei and P\'erez, Patrick and de Charette, Raoul}, title = {A Simple Recipe for Language-guided Domain Generalized Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23428-23437} }
Learning Spatial Adaptation and Temporal Coherence in Diffusion Models for Video Super-Resolution-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Zhikai and Long, Fuchen and Qiu, Zhaofan and Yao, Ting and Zhou, Wengang and Luo, Jiebo and Mei, Tao}, title = {Learning Spatial Adaptation and Temporal Coherence in Diffusion Models for Video Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9232-9241} }
Multiagent Multitraversal Multimodal Self-Driving: Open MARS Dataset-
[pdf]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Yiming and Li, Zhiheng and Chen, Nuo and Gong, Moonjun and Lyu, Zonglin and Wang, Zehong and Jiang, Peili and Feng, Chen}, title = {Multiagent Multitraversal Multimodal Self-Driving: Open MARS Dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22041-22051} }
From Variance to Veracity: Unbundling and Mitigating Gradient Variance in Differentiable Bundle Adjustment Layers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gurumurthy_2024_CVPR, author = {Gurumurthy, Swaminathan and Ram, Karnik and Chen, Bingqing and Manchester, Zachary and Kolter, Zico}, title = {From Variance to Veracity: Unbundling and Mitigating Gradient Variance in Differentiable Bundle Adjustment Layers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27507-27516} }
Denoising Point Clouds in Latent Space via Graph Convolution and Invertible Neural Network-
[pdf]
[supp]
[bibtex]@InProceedings{Mao_2024_CVPR, author = {Mao, Aihua and Yan, Biao and Ma, Zijing and He, Ying}, title = {Denoising Point Clouds in Latent Space via Graph Convolution and Invertible Neural Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5768-5777} }
ADA-Track: End-to-End Multi-Camera 3D Multi-Object Tracking with Alternating Detection and Association-
[pdf]
[supp]
[bibtex]@InProceedings{Ding_2024_CVPR, author = {Ding, Shuxiao and Schneider, Lukas and Cordts, Marius and Gall, Juergen}, title = {ADA-Track: End-to-End Multi-Camera 3D Multi-Object Tracking with Alternating Detection and Association}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15184-15194} }
HIR-Diff: Unsupervised Hyperspectral Image Restoration Via Improved Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Pang_2024_CVPR, author = {Pang, Li and Rui, Xiangyu and Cui, Long and Wang, Hongzhong and Meng, Deyu and Cao, Xiangyong}, title = {HIR-Diff: Unsupervised Hyperspectral Image Restoration Via Improved Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3005-3014} }
Mind The Edge: Refining Depth Edges in Sparsely-Supervised Monocular Depth Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Talker_2024_CVPR, author = {Talker, Lior and Cohen, Aviad and Yosef, Erez and Dana, Alexandra and Dinerstein, Michael}, title = {Mind The Edge: Refining Depth Edges in Sparsely-Supervised Monocular Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10606-10616} }
Attention-Driven Training-Free Efficiency Enhancement of Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Hongjie and Liu, Difan and Kang, Yan and Li, Yijun and Lin, Zhe and Jha, Niraj K. and Liu, Yuchen}, title = {Attention-Driven Training-Free Efficiency Enhancement of Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16080-16089} }
CPR: Retrieval Augmented Generation for Copyright Protection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Golatkar_2024_CVPR, author = {Golatkar, Aditya and Achille, Alessandro and Zancato, Luca and Wang, Yu-Xiang and Swaminathan, Ashwin and Soatto, Stefano}, title = {CPR: Retrieval Augmented Generation for Copyright Protection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12374-12384} }
FreeDrag: Feature Dragging for Reliable Point-based Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ling_2024_CVPR, author = {Ling, Pengyang and Chen, Lin and Zhang, Pan and Chen, Huaian and Jin, Yi and Zheng, Jinjin}, title = {FreeDrag: Feature Dragging for Reliable Point-based Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6860-6870} }
Image-Text Co-Decomposition for Text-Supervised Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Ji-Jia and Chang, Andy Chia-Hao and Chuang, Chieh-Yu and Chen, Chun-Pei and Liu, Yu-Lun and Chen, Min-Hung and Hu, Hou-Ning and Chuang, Yung-Yu and Lin, Yen-Yu}, title = {Image-Text Co-Decomposition for Text-Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26794-26803} }
Orchestrate Latent Expertise: Advancing Online Continual Learning with Multi-Level Supervision and Reverse Self-Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2024_CVPR, author = {Yan, Hongwei and Wang, Liyuan and Ma, Kaisheng and Zhong, Yi}, title = {Orchestrate Latent Expertise: Advancing Online Continual Learning with Multi-Level Supervision and Reverse Self-Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23670-23680} }
Vision-and-Language Navigation via Causal Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Liuyi and He, Zongtao and Dang, Ronghao and Shen, Mengjiao and Liu, Chengju and Chen, Qijun}, title = {Vision-and-Language Navigation via Causal Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13139-13150} }
Mitigating Object Dependencies: Improving Point Cloud Self-Supervised Learning through Object Exchange-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Yanhao and Zhang, Tong and Ke, Wei and Qiu, Congpei and S\"usstrunk, Sabine and Salzmann, Mathieu}, title = {Mitigating Object Dependencies: Improving Point Cloud Self-Supervised Learning through Object Exchange}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23052-23061} }
Confronting Ambiguity in 6D Object Pose Estimation via Score-Based Diffusion on SE(3)-
[pdf]
[arXiv]
[bibtex]@InProceedings{Hsiao_2024_CVPR, author = {Hsiao, Tsu-Ching and Chen, Hao-Wei and Yang, Hsuan-Kung and Lee, Chun-Yi}, title = {Confronting Ambiguity in 6D Object Pose Estimation via Score-Based Diffusion on SE(3)}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {352-362} }
Visual Anagrams: Generating Multi-View Optical Illusions with Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Geng_2024_CVPR, author = {Geng, Daniel and Park, Inbum and Owens, Andrew}, title = {Visual Anagrams: Generating Multi-View Optical Illusions with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24154-24163} }
Unveiling Parts Beyond Objects: Towards Finer-Granularity Referring Expression Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Wenxuan and Yue, Tongtian and Zhang, Yisi and Guo, Longteng and He, Xingjian and Wang, Xinlong and Liu, Jing}, title = {Unveiling Parts Beyond Objects: Towards Finer-Granularity Referring Expression Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12998-13008} }
DiffInDScene: Diffusion-based High-Quality 3D Indoor Scene Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ju_2024_CVPR, author = {Ju, Xiaoliang and Huang, Zhaoyang and Li, Yijin and Zhang, Guofeng and Qiao, Yu and Li, Hongsheng}, title = {DiffInDScene: Diffusion-based High-Quality 3D Indoor Scene Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4526-4535} }
MAPSeg: Unified Unsupervised Domain Adaptation for Heterogeneous Medical Image Segmentation Based on 3D Masked Autoencoding and Pseudo-Labeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Xuzhe and Wu, Yuhao and Angelini, Elsa and Li, Ang and Guo, Jia and Rasmussen, Jerod M. and O'Connor, Thomas G. and Wadhwa, Pathik D. and Jackowski, Andrea Parolin and Li, Hai and Posner, Jonathan and Laine, Andrew F. and Wang, Yun}, title = {MAPSeg: Unified Unsupervised Domain Adaptation for Heterogeneous Medical Image Segmentation Based on 3D Masked Autoencoding and Pseudo-Labeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5851-5862} }
Leveraging Predicate and Triplet Learning for Scene Graph Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Jiankai and Wang, Yunhong and Guo, Xiefan and Yang, Ruijie and Li, Weixin}, title = {Leveraging Predicate and Triplet Learning for Scene Graph Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28369-28379} }
DaReNeRF: Direction-aware Representation for Dynamic Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lou_2024_CVPR, author = {Lou, Ange and Planche, Benjamin and Gao, Zhongpai and Li, Yamin and Luan, Tianyu and Ding, Hao and Chen, Terrence and Noble, Jack and Wu, Ziyan}, title = {DaReNeRF: Direction-aware Representation for Dynamic Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5031-5042} }
SfmCAD: Unsupervised CAD Reconstruction by Learning Sketch-based Feature Modeling Operations-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Pu and Guo, Jianwei and Li, Huibin and Benes, Bedrich and Yan, Dong-Ming}, title = {SfmCAD: Unsupervised CAD Reconstruction by Learning Sketch-based Feature Modeling Operations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4671-4680} }
CoDi-2: In-Context Interleaved and Interactive Any-to-Any Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Tang_2024_CVPR, author = {Tang, Zineng and Yang, Ziyi and Khademi, Mahmoud and Liu, Yang and Zhu, Chenguang and Bansal, Mohit}, title = {CoDi-2: In-Context Interleaved and Interactive Any-to-Any Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27425-27434} }
Tuning Stable Rank Shrinkage: Aiming at the Overlooked Structural Risk in Fine-tuning-
[pdf]
[supp]
[bibtex]@InProceedings{Shen_2024_CVPR, author = {Shen, Sicong and Zhou, Yang and Wei, Bingzheng and Chang, Eric I-Chao and Xu, Yan}, title = {Tuning Stable Rank Shrinkage: Aiming at the Overlooked Structural Risk in Fine-tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28474-28484} }
Differentiable Display Photometric Stereo-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2024_CVPR, author = {Choi, Seokjun and Yoon, Seungwoo and Nam, Giljoo and Lee, Seungyong and Baek, Seung-Hwan}, title = {Differentiable Display Photometric Stereo}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11831-11840} }
In-distribution Public Data Synthesis with Diffusion Models for Differentially Private Image Classification-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2024_CVPR, author = {Park, Jinseong and Choi, Yujin and Lee, Jaewook}, title = {In-distribution Public Data Synthesis with Diffusion Models for Differentially Private Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12236-12246} }
Learning Degradation-unaware Representation with Prior-based Latent Transformations for Blind Face Restoration-
[pdf]
[bibtex]@InProceedings{Xie_2024_CVPR, author = {Xie, Lianxin and Zheng, Csbingbing and Xue, Wen and Jiang, Le and Liu, Cheng and Wu, Si and Wong, Hau San}, title = {Learning Degradation-unaware Representation with Prior-based Latent Transformations for Blind Face Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9120-9129} }
LSK3DNet: Towards Effective and Efficient 3D Perception with Large Sparse Kernels-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2024_CVPR, author = {Feng, Tuo and Wang, Wenguan and Ma, Fan and Yang, Yi}, title = {LSK3DNet: Towards Effective and Efficient 3D Perception with Large Sparse Kernels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14916-14927} }
Faces that Speak: Jointly Synthesising Talking Face and Speech from Text-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jang_2024_CVPR, author = {Jang, Youngjoon and Kim, Ji-Hoon and Ahn, Junseok and Kwak, Doyeop and Yang, Hong-Sun and Ju, Yoon-Cheol and Kim, Il-Hwan and Kim, Byeong-Yeol and Chung, Joon Son}, title = {Faces that Speak: Jointly Synthesising Talking Face and Speech from Text}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8818-8828} }
Diversified and Personalized Multi-rater Medical Image Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Yicheng and Luo, Xiangde and Xu, Zhe and Guo, Xiaoqing and Ju, Lie and Ge, Zongyuan and Liao, Wenjun and Cai, Jianfei}, title = {Diversified and Personalized Multi-rater Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11470-11479} }
Towards Automatic Power Battery Detection: New Challenge Benchmark Dataset and Baseline-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Xiaoqi and Pang, Youwei and Chen, Zhenyu and Yu, Qian and Zhang, Lihe and Liu, Hanqi and Zuo, Jiaming and Lu, Huchuan}, title = {Towards Automatic Power Battery Detection: New Challenge Benchmark Dataset and Baseline}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22020-22029} }
AVFF: Audio-Visual Feature Fusion for Video Deepfake Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Oorloff_2024_CVPR, author = {Oorloff, Trevine and Koppisetti, Surya and Bonettini, Nicol\`o and Solanki, Divyaraj and Colman, Ben and Yacoob, Yaser and Shahriyari, Ali and Bharaj, Gaurav}, title = {AVFF: Audio-Visual Feature Fusion for Video Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27102-27112} }
Discover and Mitigate Multiple Biased Subgroups in Image Classifiers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Zeliang and Feng, Mingqian and Li, Zhiheng and Xu, Chenliang}, title = {Discover and Mitigate Multiple Biased Subgroups in Image Classifiers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10906-10915} }
DiffusionRegPose: Enhancing Multi-Person Pose Estimation using a Diffusion-Based End-to-End Regression Approach-
[pdf]
[supp]
[bibtex]@InProceedings{Tan_2024_CVPR, author = {Tan, Dayi and Chen, Hansheng and Tian, Wei and Xiong, Lu}, title = {DiffusionRegPose: Enhancing Multi-Person Pose Estimation using a Diffusion-Based End-to-End Regression Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2230-2239} }
Memory-Scalable and Simplified Functional Map Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Magnet_2024_CVPR, author = {Magnet, Robin and Ovsjanikov, Maks}, title = {Memory-Scalable and Simplified Functional Map Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4041-4050} }
X-MIC: Cross-Modal Instance Conditioning for Egocentric Action Generalization-
[pdf]
[supp]
[bibtex]@InProceedings{Kukleva_2024_CVPR, author = {Kukleva, Anna and Sener, Fadime and Remelli, Edoardo and Tekin, Bugra and Sauser, Eric and Schiele, Bernt and Ma, Shugao}, title = {X-MIC: Cross-Modal Instance Conditioning for Egocentric Action Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26364-26373} }
ExMap: Leveraging Explainability Heatmaps for Unsupervised Group Robustness to Spurious Correlations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chakraborty_2024_CVPR, author = {Chakraborty, Rwiddhi and Sletten, Adrian and Kampffmeyer, Michael C.}, title = {ExMap: Leveraging Explainability Heatmaps for Unsupervised Group Robustness to Spurious Correlations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12017-12026} }
Gaussian Head Avatar: Ultra High-fidelity Head Avatar via Dynamic Gaussians-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Yuelang and Chen, Benwang and Li, Zhe and Zhang, Hongwen and Wang, Lizhen and Zheng, Zerong and Liu, Yebin}, title = {Gaussian Head Avatar: Ultra High-fidelity Head Avatar via Dynamic Gaussians}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1931-1941} }
Stratified Avatar Generation from Sparse Observations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2024_CVPR, author = {Feng, Han and Ma, Wenchao and Gao, Quankai and Zheng, Xianwei and Xue, Nan and Xu, Huijuan}, title = {Stratified Avatar Generation from Sparse Observations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {153-163} }
Learning to Segment Referred Objects from Narrated Egocentric Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Shen_2024_CVPR, author = {Shen, Yuhan and Wang, Huiyu and Yang, Xitong and Feiszli, Matt and Elhamifar, Ehsan and Torresani, Lorenzo and Mavroudi, Effrosyni}, title = {Learning to Segment Referred Objects from Narrated Egocentric Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14510-14520} }
Rewrite the Stars-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2024_CVPR, author = {Ma, Xu and Dai, Xiyang and Bai, Yue and Wang, Yizhou and Fu, Yun}, title = {Rewrite the Stars}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5694-5703} }
Adapting Visual-Language Models for Generalizable Anomaly Detection in Medical Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Chaoqin and Jiang, Aofan and Feng, Jinghao and Zhang, Ya and Wang, Xinchao and Wang, Yanfeng}, title = {Adapting Visual-Language Models for Generalizable Anomaly Detection in Medical Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11375-11385} }
AV-RIR: Audio-Visual Room Impulse Response Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Ratnarajah_2024_CVPR, author = {Ratnarajah, Anton and Ghosh, Sreyan and Kumar, Sonal and Chiniya, Purva and Manocha, Dinesh}, title = {AV-RIR: Audio-Visual Room Impulse Response Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27164-27175} }
Depth-aware Test-Time Training for Zero-shot Video Object Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Weihuang and Shen, Xi and Li, Haolun and Bi, Xiuli and Liu, Bo and Pun, Chi-Man and Cun, Xiaodong}, title = {Depth-aware Test-Time Training for Zero-shot Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19218-19227} }
Dual-Consistency Model Inversion for Non-Exemplar Class Incremental Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Qiu_2024_CVPR, author = {Qiu, Zihuan and Xu, Yi and Meng, Fanman and Li, Hongliang and Xu, Linfeng and Wu, Qingbo}, title = {Dual-Consistency Model Inversion for Non-Exemplar Class Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24025-24035} }
RMem: Restricted Memory Banks Improve Video Object Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Junbao and Pang, Ziqi and Wang, Yu-Xiong}, title = {RMem: Restricted Memory Banks Improve Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18602-18611} }
Not All Prompts Are Secure: A Switchable Backdoor Attack Against Pre-trained Vision Transfomers-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Sheng and Bai, Jiawang and Gao, Kuofeng and Yang, Yong and Li, Yiming and Xia, Shu-Tao}, title = {Not All Prompts Are Secure: A Switchable Backdoor Attack Against Pre-trained Vision Transfomers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24431-24441} }
PairDETR : Joint Detection and Association of Human Bodies and Faces-
[pdf]
[supp]
[bibtex]@InProceedings{Ali_2024_CVPR, author = {Ali, Ammar and Gaikov, Georgii and Rybalchenko, Denis and Chigorin, Alexander and Laptev, Ivan and Zagoruyko, Sergey}, title = {PairDETR : Joint Detection and Association of Human Bodies and Faces}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {423-432} }
PortraitBooth: A Versatile Portrait Model for Fast Identity-preserved Personalization-
[pdf]
[arXiv]
[bibtex]@InProceedings{Peng_2024_CVPR, author = {Peng, Xu and Zhu, Junwei and Jiang, Boyuan and Tai, Ying and Luo, Donghao and Zhang, Jiangning and Lin, Wei and Jin, Taisong and Wang, Chengjie and Ji, Rongrong}, title = {PortraitBooth: A Versatile Portrait Model for Fast Identity-preserved Personalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27080-27090} }
Learn from View Correlation: An Anchor Enhancement Strategy for Multi-view Clustering-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Suyuan and Liang, Ke and Dong, Zhibin and Wang, Siwei and Yang, Xihong and Zhou, Sihang and Zhu, En and Liu, Xinwang}, title = {Learn from View Correlation: An Anchor Enhancement Strategy for Multi-view Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26151-26161} }
SportsSloMo: A New Benchmark and Baselines for Human-centric Video Frame Interpolation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Jiaben and Jiang, Huaizu}, title = {SportsSloMo: A New Benchmark and Baselines for Human-centric Video Frame Interpolation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6475-6486} }
APSeg: Auto-Prompt Network for Cross-Domain Few-Shot Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2024_CVPR, author = {He, Weizhao and Zhang, Yang and Zhuo, Wei and Shen, Linlin and Yang, Jiaqi and Deng, Songhe and Sun, Liang}, title = {APSeg: Auto-Prompt Network for Cross-Domain Few-Shot Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23762-23772} }
Text2HOI: Text-guided 3D Motion Generation for Hand-Object Interaction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cha_2024_CVPR, author = {Cha, Junuk and Kim, Jihyeon and Yoon, Jae Shin and Baek, Seungryul}, title = {Text2HOI: Text-guided 3D Motion Generation for Hand-Object Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1577-1585} }
Zero-TPrune: Zero-Shot Token Pruning through Leveraging of the Attention Graph in Pre-Trained Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Hongjie and Dedhia, Bhishma and Jha, Niraj K.}, title = {Zero-TPrune: Zero-Shot Token Pruning through Leveraging of the Attention Graph in Pre-Trained Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16070-16079} }
Enhancing Visual Continual Learning with Language-Guided Supervision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ni_2024_CVPR, author = {Ni, Bolin and Zhao, Hongbo and Zhang, Chenghao and Hu, Ke and Meng, Gaofeng and Zhang, Zhaoxiang and Xiang, Shiming}, title = {Enhancing Visual Continual Learning with Language-Guided Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24068-24077} }
MACE: Mass Concept Erasure in Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2024_CVPR, author = {Lu, Shilin and Wang, Zilan and Li, Leyang and Liu, Yanzhu and Kong, Adams Wai-Kin}, title = {MACE: Mass Concept Erasure in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6430-6440} }
DIBS: Enhancing Dense Video Captioning with Unlabeled Videos via Pseudo Boundary Enrichment and Online Refinement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Hao and Liu, Huabin and Qiao, Yu and Sun, Xiao}, title = {DIBS: Enhancing Dense Video Captioning with Unlabeled Videos via Pseudo Boundary Enrichment and Online Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18699-18708} }
PeLK: Parameter-efficient Large Kernel ConvNets with Peripheral Convolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Honghao and Chu, Xiangxiang and Ren, Yongjian and Zhao, Xin and Huang, Kaiqi}, title = {PeLK: Parameter-efficient Large Kernel ConvNets with Peripheral Convolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5557-5567} }
AiOS: All-in-One-Stage Expressive Human Pose and Shape Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2024_CVPR, author = {Sun, Qingping and Wang, Yanjun and Zeng, Ailing and Yin, Wanqi and Wei, Chen and Wang, Wenjia and Mei, Haiyi and Leung, Chi-Sing and Liu, Ziwei and Yang, Lei and Cai, Zhongang}, title = {AiOS: All-in-One-Stage Expressive Human Pose and Shape Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1834-1843} }
SOK-Bench: A Situated Video Reasoning Benchmark with Aligned Open-World Knowledge-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Andong and Wu, Bo and Chen, Sunli and Chen, Zhenfang and Guan, Haotian and Lee, Wei-Ning and Li, Li Erran and Gan, Chuang}, title = {SOK-Bench: A Situated Video Reasoning Benchmark with Aligned Open-World Knowledge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13384-13394} }
LORS: Low-rank Residual Structure for Parameter-Efficient Network Stacking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Jialin and Nie, Qiang and Fu, Weifu and Lin, Yuhuan and Tao, Guangpin and Liu, Yong and Wang, Chengjie}, title = {LORS: Low-rank Residual Structure for Parameter-Efficient Network Stacking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15866-15876} }
Design2Cloth: 3D Cloth Generation from 2D Masks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2024_CVPR, author = {Zheng, Jiali and Potamias, Rolandos Alexandros and Zafeiriou, Stefanos}, title = {Design2Cloth: 3D Cloth Generation from 2D Masks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1748-1758} }
Multi-modal In-Context Learning Makes an Ego-evolving Scene Text Recognizer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Zhen and Tang, Jingqun and Lin, Chunhui and Wu, Binghong and Huang, Can and Liu, Hao and Tan, Xin and Zhang, Zhizhong and Xie, Yuan}, title = {Multi-modal In-Context Learning Makes an Ego-evolving Scene Text Recognizer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15567-15576} }
Amodal Completion via Progressive Mixed Context Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Katherine and Zhang, Lingzhi and Shi, Jianbo}, title = {Amodal Completion via Progressive Mixed Context Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9099-9109} }
Training Diffusion Models Towards Diverse Image Generation with Reinforcement Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Miao_2024_CVPR, author = {Miao, Zichen and Wang, Jiang and Wang, Ze and Yang, Zhengyuan and Wang, Lijuan and Qiu, Qiang and Liu, Zicheng}, title = {Training Diffusion Models Towards Diverse Image Generation with Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10844-10853} }
Diffusion 3D Features (Diff3F): Decorating Untextured Shapes with Distilled Semantic Features-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dutt_2024_CVPR, author = {Dutt, Niladri Shekhar and Muralikrishnan, Sanjeev and Mitra, Niloy J.}, title = {Diffusion 3D Features (Diff3F): Decorating Untextured Shapes with Distilled Semantic Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4494-4504} }
LASIL: Learner-Aware Supervised Imitation Learning For Long-term Microscopic Traffic Simulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2024_CVPR, author = {Guo, Ke and Miao, Zhenwei and Jing, Wei and Liu, Weiwei and Li, Weizi and Hao, Dayang and Pan, Jia}, title = {LASIL: Learner-Aware Supervised Imitation Learning For Long-term Microscopic Traffic Simulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15386-15395} }
Revamping Federated Learning Security from a Defender's Perspective: A Unified Defense with Homomorphic Encrypted Data Space-
[pdf]
[supp]
[bibtex]@InProceedings{Kumar_2024_CVPR, author = {Kumar, K Naveen and Mitra, Reshmi and Mohan, C Krishna}, title = {Revamping Federated Learning Security from a Defender's Perspective: A Unified Defense with Homomorphic Encrypted Data Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24387-24397} }
A Dynamic Kernel Prior Model for Unsupervised Blind Image Super-Resolution-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Zhixiong and Xia, Jingyuan and Li, Shengxi and Huang, Xinghua and Zhang, Shuanghui and Liu, Zhen and Fu, Yaowen and Liu, Yongxiang}, title = {A Dynamic Kernel Prior Model for Unsupervised Blind Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26046-26056} }
Cinematic Behavior Transfer via NeRF-based Differentiable Filming-
[pdf]
[arXiv]
[bibtex]@InProceedings{Jiang_2024_CVPR, author = {Jiang, Xuekun and Rao, Anyi and Wang, Jingbo and Lin, Dahua and Dai, Bo}, title = {Cinematic Behavior Transfer via NeRF-based Differentiable Filming}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6723-6732} }
SeaBird: Segmentation in Bird's View with Dice Loss Improves Monocular 3D Detection of Large Objects-
[pdf]
[supp]
[bibtex]@InProceedings{Kumar_2024_CVPR, author = {Kumar, Abhinav and Guo, Yuliang and Huang, Xinyu and Ren, Liu and Liu, Xiaoming}, title = {SeaBird: Segmentation in Bird's View with Dice Loss Improves Monocular 3D Detection of Large Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10269-10280} }
Text-Driven Image Editing via Learnable Regions-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lin_2024_CVPR, author = {Lin, Yuanze and Chen, Yi-Wen and Tsai, Yi-Hsuan and Jiang, Lu and Yang, Ming-Hsuan}, title = {Text-Driven Image Editing via Learnable Regions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7059-7068} }
Relation Rectification in Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Yinwei and Yang, Xingyi and Wang, Xinchao}, title = {Relation Rectification in Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7685-7694} }
NOPE: Novel Object Pose Estimation from a Single Image-
[pdf]
[supp]
[bibtex]@InProceedings{Nguyen_2024_CVPR, author = {Nguyen, Van Nguyen and Groueix, Thibault and Ponimatkin, Georgy and Hu, Yinlin and Marlet, Renaud and Salzmann, Mathieu and Lepetit, Vincent}, title = {NOPE: Novel Object Pose Estimation from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17923-17932} }
Mocap Everyone Everywhere: Lightweight Motion Capture With Smartwatches and a Head-Mounted Camera-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2024_CVPR, author = {Lee, Jiye and Joo, Hanbyul}, title = {Mocap Everyone Everywhere: Lightweight Motion Capture With Smartwatches and a Head-Mounted Camera}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1091-1100} }
Fast ODE-based Sampling for Diffusion Models in Around 5 Steps-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Zhenyu and Chen, Defang and Wang, Can and Chen, Chun}, title = {Fast ODE-based Sampling for Diffusion Models in Around 5 Steps}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7777-7786} }
Dual-View Visual Contextualization for Web Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kil_2024_CVPR, author = {Kil, Jihyung and Song, Chan Hee and Zheng, Boyuan and Deng, Xiang and Su, Yu and Chao, Wei-Lun}, title = {Dual-View Visual Contextualization for Web Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14445-14454} }
Language-driven Grasp Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Vuong_2024_CVPR, author = {Vuong, An Dinh and Vu, Minh Nhat and Huang, Baoru and Nguyen, Nghia and Le, Hieu and Vo, Thieu and Nguyen, Anh}, title = {Language-driven Grasp Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17902-17912} }
Towards Modern Image Manipulation Localization: A Large-Scale Dataset and Novel Methods-
[pdf]
[supp]
[bibtex]@InProceedings{Qu_2024_CVPR, author = {Qu, Chenfan and Zhong, Yiwu and Liu, Chongyu and Xu, Guitao and Peng, Dezhi and Guo, Fengjun and Jin, Lianwen}, title = {Towards Modern Image Manipulation Localization: A Large-Scale Dataset and Novel Methods}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10781-10790} }
Mitigating Noisy Correspondence by Geometrical Structure Consistency Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Zihua and Chen, Mengxi and Dai, Tianjie and Yao, Jiangchao and Han, Bo and Zhang, Ya and Wang, Yanfeng}, title = {Mitigating Noisy Correspondence by Geometrical Structure Consistency Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27381-27390} }
CLiC: Concept Learning in Context-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Safaee_2024_CVPR, author = {Safaee, Mehdi and Mikaeili, Aryan and Patashnik, Or and Cohen-Or, Daniel and Mahdavi-Amiri, Ali}, title = {CLiC: Concept Learning in Context}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6924-6933} }
CAD-SIGNet: CAD Language Inference from Point Clouds using Layer-wise Sketch Instance Guided Attention-
[pdf]
[supp]
[bibtex]@InProceedings{Khan_2024_CVPR, author = {Khan, Mohammad Sadil and Dupont, Elona and Ali, Sk Aziz and Cherenkova, Kseniya and Kacem, Anis and Aouada, Djamila}, title = {CAD-SIGNet: CAD Language Inference from Point Clouds using Layer-wise Sketch Instance Guided Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4713-4722} }
Object Recognition as Next Token Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yue_2024_CVPR, author = {Yue, Kaiyu and Chen, Bor-Chun and Geiping, Jonas and Li, Hengduo and Goldstein, Tom and Lim, Ser-Nam}, title = {Object Recognition as Next Token Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16645-16656} }
CLIB-FIQA: Face Image Quality Assessment with Confidence Calibration-
[pdf]
[bibtex]@InProceedings{Ou_2024_CVPR, author = {Ou, Fu-Zhao and Li, Chongyi and Wang, Shiqi and Kwong, Sam}, title = {CLIB-FIQA: Face Image Quality Assessment with Confidence Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1694-1704} }
DVMNet: Computing Relative Pose for Unseen Objects Beyond Hypotheses-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Chen and Zhang, Tong and Dang, Zheng and Salzmann, Mathieu}, title = {DVMNet: Computing Relative Pose for Unseen Objects Beyond Hypotheses}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20485-20495} }
Transcriptomics-guided Slide Representation Learning in Computational Pathology-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jaume_2024_CVPR, author = {Jaume, Guillaume and Oldenburg, Lukas and Vaidya, Anurag and Chen, Richard J. and Williamson, Drew F.K. and Peeters, Thomas and Song, Andrew H. and Mahmood, Faisal}, title = {Transcriptomics-guided Slide Representation Learning in Computational Pathology}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9632-9644} }
Predicated Diffusion: Predicate Logic-Based Attention Guidance for Text-to-Image Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sueyoshi_2024_CVPR, author = {Sueyoshi, Kota and Matsubara, Takashi}, title = {Predicated Diffusion: Predicate Logic-Based Attention Guidance for Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8651-8660} }
MuRF: Multi-Baseline Radiance Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Haofei and Chen, Anpei and Chen, Yuedong and Sakaridis, Christos and Zhang, Yulun and Pollefeys, Marc and Geiger, Andreas and Yu, Fisher}, title = {MuRF: Multi-Baseline Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20041-20050} }
CLIP-BEVFormer: Enhancing Multi-View Image-Based BEV Detector with Ground Truth Flow-
[pdf]
[supp]
[bibtex]@InProceedings{Pan_2024_CVPR, author = {Pan, Chenbin and Yaman, Burhaneddin and Velipasalar, Senem and Ren, Liu}, title = {CLIP-BEVFormer: Enhancing Multi-View Image-Based BEV Detector with Ground Truth Flow}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15216-15225} }
CLOVA: A Closed-LOop Visual Assistant with Tool Usage and Update-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2024_CVPR, author = {Gao, Zhi and Du, Yuntao and Zhang, Xintong and Ma, Xiaojian and Han, Wenjuan and Zhu, Song-Chun and Li, Qing}, title = {CLOVA: A Closed-LOop Visual Assistant with Tool Usage and Update}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13258-13268} }
Depth Prompting for Sensor-Agnostic Depth Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2024_CVPR, author = {Park, Jin-Hwi and Jeong, Chanhwi and Lee, Junoh and Jeon, Hae-Gon}, title = {Depth Prompting for Sensor-Agnostic Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9859-9869} }
G3DR: Generative 3D Reconstruction in ImageNet-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Reddy_2024_CVPR, author = {Reddy, Pradyumna and Elezi, Ismail and Deng, Jiankang}, title = {G3DR: Generative 3D Reconstruction in ImageNet}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9655-9665} }
MoML: Online Meta Adaptation for 3D Human Motion Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2024_CVPR, author = {Sun, Xiaoning and Sun, Huaijiang and Li, Bin and Wei, Dong and Li, Weiqing and Lu, Jianfeng}, title = {MoML: Online Meta Adaptation for 3D Human Motion Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1042-1051} }
CAT-DM: Controllable Accelerated Virtual Try-on with Diffusion Model-
[pdf]
[supp]
[bibtex]@InProceedings{Zeng_2024_CVPR, author = {Zeng, Jianhao and Song, Dan and Nie, Weizhi and Tian, Hongshuo and Wang, Tongtong and Liu, An-An}, title = {CAT-DM: Controllable Accelerated Virtual Try-on with Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8372-8382} }
Hyperspherical Classification with Dynamic Label-to-Prototype Assignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Saadabadi_2024_CVPR, author = {Saadabadi, Mohammad Saeed Ebrahimi and Dabouei, Ali and Malakshan, Sahar Rahimi and Nasrabadi, Nasser M.}, title = {Hyperspherical Classification with Dynamic Label-to-Prototype Assignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17333-17342} }
VTimeLLM: Empower LLM to Grasp Video Moments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Bin and Wang, Xin and Chen, Hong and Song, Zihan and Zhu, Wenwu}, title = {VTimeLLM: Empower LLM to Grasp Video Moments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14271-14280} }
FLHetBench: Benchmarking Device and State Heterogeneity in Federated Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Junyuan and Zeng, Shuang and Zhang, Miao and Wang, Runxi and Wang, Feifei and Zhou, Yuyin and Liang, Paul Pu and Qu, Liangqiong}, title = {FLHetBench: Benchmarking Device and State Heterogeneity in Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12098-12108} }
Flattening the Parent Bias: Hierarchical Semantic Segmentation in the Poincare Ball-
[pdf]
[supp]
[bibtex]@InProceedings{Weber_2024_CVPR, author = {Weber, Simon and Z\"ong\"ur, Bar?? and Araslanov, Nikita and Cremers, Daniel}, title = {Flattening the Parent Bias: Hierarchical Semantic Segmentation in the Poincare Ball}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28223-28232} }
Privacy-Preserving Optics for Enhancing Protection in Face De-Identification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lopez_2024_CVPR, author = {Lopez, Jhon and Hinojosa, Carlos and Arguello, Henry and Ghanem, Bernard}, title = {Privacy-Preserving Optics for Enhancing Protection in Face De-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12120-12129} }
SmartRefine: A Scenario-Adaptive Refinement Framework for Efficient Motion Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Yang and Shao, Hao and Wang, Letian and Waslander, Steven L. and Li, Hongsheng and Liu, Yu}, title = {SmartRefine: A Scenario-Adaptive Refinement Framework for Efficient Motion Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15281-15290} }
MVBench: A Comprehensive Multi-modal Video Understanding Benchmark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Kunchang and Wang, Yali and He, Yinan and Li, Yizhuo and Wang, Yi and Liu, Yi and Wang, Zun and Xu, Jilan and Chen, Guo and Luo, Ping and Wang, Limin and Qiao, Yu}, title = {MVBench: A Comprehensive Multi-modal Video Understanding Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22195-22206} }
Multi-Scale Video Anomaly Detection by Multi-Grained Spatio-Temporal Representation Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Menghao and Wang, Jingyu and Qi, Qi and Sun, Haifeng and Zhuang, Zirui and Ren, Pengfei and Ma, Ruilong and Liao, Jianxin}, title = {Multi-Scale Video Anomaly Detection by Multi-Grained Spatio-Temporal Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17385-17394} }
An Aggregation-Free Federated Learning for Tackling Data Heterogeneity-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Yuan and Fu, Huazhu and Kanagavelu, Renuga and Wei, Qingsong and Liu, Yong and Goh, Rick Siow Mong}, title = {An Aggregation-Free Federated Learning for Tackling Data Heterogeneity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26233-26242} }
Generative Multimodal Models are In-Context Learners-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2024_CVPR, author = {Sun, Quan and Cui, Yufeng and Zhang, Xiaosong and Zhang, Fan and Yu, Qiying and Wang, Yueze and Rao, Yongming and Liu, Jingjing and Huang, Tiejun and Wang, Xinlong}, title = {Generative Multimodal Models are In-Context Learners}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14398-14409} }
Synergistic Global-space Camera and Human Reconstruction from Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Yizhou and Wang, Tuanfeng Yang and Raj, Bhiksha and Xu, Min and Yang, Jimei and Huang, Chun-Hao Paul}, title = {Synergistic Global-space Camera and Human Reconstruction from Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1216-1226} }
Hierarchical Intra-modal Correlation Learning for Label-free 3D Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Kang_2024_CVPR, author = {Kang, Xin and Chu, Lei and Li, Jiahao and Chen, Xuejin and Lu, Yan}, title = {Hierarchical Intra-modal Correlation Learning for Label-free 3D Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28244-28253} }
Feature Re-Embedding: Towards Foundation Model-Level Performance in Computational Pathology-
[pdf]
[supp]
[bibtex]@InProceedings{Tang_2024_CVPR, author = {Tang, Wenhao and Zhou, Fengtao and Huang, Sheng and Zhu, Xiang and Zhang, Yi and Liu, Bo}, title = {Feature Re-Embedding: Towards Foundation Model-Level Performance in Computational Pathology}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11343-11352} }
DiffSal: Joint Audio and Video Learning for Diffusion Saliency Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiong_2024_CVPR, author = {Xiong, Junwen and Zhang, Peng and You, Tao and Li, Chuanyue and Huang, Wei and Zha, Yufei}, title = {DiffSal: Joint Audio and Video Learning for Diffusion Saliency Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27273-27283} }
Revisiting Single Image Reflection Removal In the Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Yurui and Fu, Xueyang and Jiang, Peng-Tao and Zhang, Hao and Sun, Qibin and Chen, Jinwei and Zha, Zheng-Jun and Li, Bo}, title = {Revisiting Single Image Reflection Removal In the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25468-25478} }
3D Face Reconstruction with the Geometric Guidance of Facial Part Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Zidu and Zhu, Xiangyu and Zhang, Tianshuo and Wang, Baiqin and Lei, Zhen}, title = {3D Face Reconstruction with the Geometric Guidance of Facial Part Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1672-1682} }
FreeU: Free Lunch in Diffusion U-Net-
[pdf]
[supp]
[bibtex]@InProceedings{Si_2024_CVPR, author = {Si, Chenyang and Huang, Ziqi and Jiang, Yuming and Liu, Ziwei}, title = {FreeU: Free Lunch in Diffusion U-Net}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4733-4743} }
Text Prompt with Normality Guidance for Weakly Supervised Video Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Zhiwei and Liu, Jing and Wu, Peng}, title = {Text Prompt with Normality Guidance for Weakly Supervised Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18899-18908} }
SparseOcc: Rethinking Sparse Latent Representation for Vision-Based Semantic Occupancy Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2024_CVPR, author = {Tang, Pin and Wang, Zhongdao and Wang, Guoqing and Zheng, Jilai and Ren, Xiangxuan and Feng, Bailan and Ma, Chao}, title = {SparseOcc: Rethinking Sparse Latent Representation for Vision-Based Semantic Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15035-15044} }
SinSR: Diffusion-Based Image Super-Resolution in a Single Step-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Yufei and Yang, Wenhan and Chen, Xinyuan and Wang, Yaohui and Guo, Lanqing and Chau, Lap-Pui and Liu, Ziwei and Qiao, Yu and Kot, Alex C. and Wen, Bihan}, title = {SinSR: Diffusion-Based Image Super-Resolution in a Single Step}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25796-25805} }
Frequency Decoupling for Motion Magnification via Multi-Level Isomorphic Architecture-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Fei and Guo, Dan and Li, Kun and Zhong, Zhun and Wang, Meng}, title = {Frequency Decoupling for Motion Magnification via Multi-Level Isomorphic Architecture}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18984-18994} }
Systematic Comparison of Semi-supervised and Self-supervised Learning for Medical Image Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Zhe and Jiang, Ruijie and Aeron, Shuchin and Hughes, Michael C.}, title = {Systematic Comparison of Semi-supervised and Self-supervised Learning for Medical Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22282-22293} }
ViewDiff: 3D-Consistent Image Generation with Text-to-Image Models-
[pdf]
[supp]
[bibtex]@InProceedings{Hollein_2024_CVPR, author = {H\"ollein, Lukas and Bo\v{z}i?, Alja\v{z} and M\"uller, Norman and Novotny, David and Tseng, Hung-Yu and Richardt, Christian and Zollh\"ofer, Michael and Nie{\ss}ner, Matthias}, title = {ViewDiff: 3D-Consistent Image Generation with Text-to-Image Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5043-5052} }
Hyperbolic Learning with Synthetic Captions for Open-World Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kong_2024_CVPR, author = {Kong, Fanjie and Chen, Yanbei and Cai, Jiarui and Modolo, Davide}, title = {Hyperbolic Learning with Synthetic Captions for Open-World Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16762-16771} }
Diffusion Models Without Attention-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yan_2024_CVPR, author = {Yan, Jing Nathan and Gu, Jiatao and Rush, Alexander M.}, title = {Diffusion Models Without Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8239-8249} }
Interpretable Measures of Conceptual Similarity by Complexity-Constrained Descriptive Auto-Encoding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Achille_2024_CVPR, author = {Achille, Alessandro and Steeg, Greg Ver and Liu, Tian Yu and Trager, Matthew and Klingenberg, Carson and Soatto, Stefano}, title = {Interpretable Measures of Conceptual Similarity by Complexity-Constrained Descriptive Auto-Encoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11062-11071} }
Emotional Speech-driven 3D Body Animation via Disentangled Latent Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Chhatre_2024_CVPR, author = {Chhatre, Kiran and Dan??ek, Radek and Athanasiou, Nikos and Becherini, Giorgio and Peters, Christopher and Black, Michael J. and Bolkart, Timo}, title = {Emotional Speech-driven 3D Body Animation via Disentangled Latent Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1942-1953} }
3D Feature Tracking via Event Camera-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Siqi and Zhou, Zhikuan and Xue, Zhou and Li, Yipeng and Du, Shaoyi and Gao, Yue}, title = {3D Feature Tracking via Event Camera}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18974-18983} }
Retrieval-Augmented Layout Transformer for Content-Aware Layout Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Horita_2024_CVPR, author = {Horita, Daichi and Inoue, Naoto and Kikuchi, Kotaro and Yamaguchi, Kota and Aizawa, Kiyoharu}, title = {Retrieval-Augmented Layout Transformer for Content-Aware Layout Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {67-76} }
MSU-4S - The Michigan State University Four Seasons Dataset-
[pdf]
[bibtex]@InProceedings{Kent_2024_CVPR, author = {Kent, Daniel and Alyaqoub, Mohammed and Lu, Xiaohu and Khatounabadi, Hamed and Sung, Kookjin and Scheller, Cole and Dalat, Alexander and bin Thabit, Asma and Whitley, Roberto and Radha, Hayder}, title = {MSU-4S - The Michigan State University Four Seasons Dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22658-22667} }
Improving Plasticity in Online Continual Learning via Collaborative Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Maorong and Michel, Nicolas and Xiao, Ling and Yamasaki, Toshihiko}, title = {Improving Plasticity in Online Continual Learning via Collaborative Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23460-23469} }
InstantBooth: Personalized Text-to-Image Generation without Test-Time Finetuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2024_CVPR, author = {Shi, Jing and Xiong, Wei and Lin, Zhe and Jung, Hyun Joon}, title = {InstantBooth: Personalized Text-to-Image Generation without Test-Time Finetuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8543-8552} }
MaxQ: Multi-Axis Query for N:M Sparsity Network-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiang_2024_CVPR, author = {Xiang, Jingyang and Li, Siqi and Chen, Junhao and Chen, Zhuangzhi and Huang, Tianxin and Peng, Linpeng and Liu, Yong}, title = {MaxQ: Multi-Axis Query for N:M Sparsity Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15845-15854} }
Part-aware Unified Representation of Language and Skeleton for Zero-shot Action Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Anqi and Ke, Qiuhong and Gong, Mingming and Bailey, James}, title = {Part-aware Unified Representation of Language and Skeleton for Zero-shot Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18761-18770} }
SD2Event:Self-supervised Learning of Dynamic Detectors and Contextual Descriptors for Event Cameras-
[pdf]
[bibtex]@InProceedings{Gao_2024_CVPR, author = {Gao, Yuan and Zhu, Yuqing and Li, Xinjun and Du, Yimin and Zhang, Tianzhu}, title = {SD2Event:Self-supervised Learning of Dynamic Detectors and Contextual Descriptors for Event Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3055-3064} }
Composing Object Relations and Attributes for Image-Text Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pham_2024_CVPR, author = {Pham, Khoi and Huynh, Chuong and Lim, Ser-Nam and Shrivastava, Abhinav}, title = {Composing Object Relations and Attributes for Image-Text Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14354-14363} }
Previously on ... From Recaps to Story Summarization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Singh_2024_CVPR, author = {Singh, Aditya Kumar and Srivastava, Dhruv and Tapaswi, Makarand}, title = {Previously on ... From Recaps to Story Summarization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13635-13646} }
PaReNeRF: Toward Fast Large-scale Dynamic NeRF with Patch-based Reference-
[pdf]
[supp]
[bibtex]@InProceedings{Tang_2024_CVPR, author = {Tang, Xiao and Yang, Min and Sun, Penghui and Li, Hui and Dai, Yuchao and Zhu, Feng and Lee, Hojae}, title = {PaReNeRF: Toward Fast Large-scale Dynamic NeRF with Patch-based Reference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5428-5438} }
mPLUG-Owl2: Revolutionizing Multi-modal Large Language Model with Modality Collaboration-
[pdf]
[supp]
[bibtex]@InProceedings{Ye_2024_CVPR, author = {Ye, Qinghao and Xu, Haiyang and Ye, Jiabo and Yan, Ming and Hu, Anwen and Liu, Haowei and Qian, Qi and Zhang, Ji and Huang, Fei}, title = {mPLUG-Owl2: Revolutionizing Multi-modal Large Language Model with Modality Collaboration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13040-13051} }
Spectral and Polarization Vision: Spectro-polarimetric Real-world Dataset-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeon_2024_CVPR, author = {Jeon, Yujin and Choi, Eunsue and Kim, Youngchan and Moon, Yunseong and Omer, Khalid and Heide, Felix and Baek, Seung-Hwan}, title = {Spectral and Polarization Vision: Spectro-polarimetric Real-world Dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22098-22108} }
Learning by Correction: Efficient Tuning Task for Zero-Shot Generative Vision-Language Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Rongjie and Wu, Yu and He, Xuming}, title = {Learning by Correction: Efficient Tuning Task for Zero-Shot Generative Vision-Language Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13428-13437} }
Supervised Anomaly Detection for Complex Industrial Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Baitieva_2024_CVPR, author = {Baitieva, Aimira and Hurych, David and Besnier, Victor and Bernard, Olivier}, title = {Supervised Anomaly Detection for Complex Industrial Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17754-17762} }
Open3DSG: Open-Vocabulary 3D Scene Graphs from Point Clouds with Queryable Objects and Open-Set Relationships-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Koch_2024_CVPR, author = {Koch, Sebastian and Vaskevicius, Narunas and Colosi, Mirco and Hermosilla, Pedro and Ropinski, Timo}, title = {Open3DSG: Open-Vocabulary 3D Scene Graphs from Point Clouds with Queryable Objects and Open-Set Relationships}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14183-14193} }
SURE: SUrvey REcipes for building reliable and robust deep networks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Yuting and Chen, Yingyi and Yu, Xuanlong and Chen, Dexiong and Shen, Xi}, title = {SURE: SUrvey REcipes for building reliable and robust deep networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17500-17510} }
PolarRec: Improving Radio Interferometric Data Reconstruction Using Polar Coordinates-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Ruoqi and Chen, Zhuoyang and Zhu, Jiayi and Luo, Qiong and Wang, Feng}, title = {PolarRec: Improving Radio Interferometric Data Reconstruction Using Polar Coordinates}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12841-12850} }
Affine Equivariant Networks Based on Differential Invariants-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Yikang and Qiu, Yeqing and Chen, Yuxuan and He, Lingshen and Lin, Zhouchen}, title = {Affine Equivariant Networks Based on Differential Invariants}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5546-5556} }
Selectively Informative Description can Reduce Undesired Embedding Entanglements in Text-to-Image Personalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Jimyeong and Park, Jungwon and Rhee, Wonjong}, title = {Selectively Informative Description can Reduce Undesired Embedding Entanglements in Text-to-Image Personalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8312-8322} }
Summarize the Past to Predict the Future: Natural Language Descriptions of Context Boost Multimodal Object Interaction Anticipation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pasca_2024_CVPR, author = {Pasca, Razvan-George and Gavryushin, Alexey and Hamza, Muhammad and Kuo, Yen-Ling and Mo, Kaichun and Van Gool, Luc and Hilliges, Otmar and Wang, Xi}, title = {Summarize the Past to Predict the Future: Natural Language Descriptions of Context Boost Multimodal Object Interaction Anticipation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18286-18296} }
Transfer CLIP for Generalizable Image Denoising-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2024_CVPR, author = {Cheng, Jun and Liang, Dong and Tan, Shan}, title = {Transfer CLIP for Generalizable Image Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25974-25984} }
Smooth Diffusion: Crafting Smooth Latent Spaces in Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2024_CVPR, author = {Guo, Jiayi and Xu, Xingqian and Pu, Yifan and Ni, Zanlin and Wang, Chaofei and Vasu, Manushree and Song, Shiji and Huang, Gao and Shi, Humphrey}, title = {Smooth Diffusion: Crafting Smooth Latent Spaces in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7548-7558} }
Towards CLIP-driven Language-free 3D Visual Grounding via 2D-3D Relational Enhancement and Consistency-
[pdf]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yuqi and Luo, Han and Lei, Yinjie}, title = {Towards CLIP-driven Language-free 3D Visual Grounding via 2D-3D Relational Enhancement and Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13063-13072} }
Optimal Transport Aggregation for Visual Place Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Izquierdo_2024_CVPR, author = {Izquierdo, Sergio and Civera, Javier}, title = {Optimal Transport Aggregation for Visual Place Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17658-17668} }
FlowIE: Efficient Image Enhancement via Rectified Flow-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Yixuan and Zhao, Wenliang and Li, Ao and Tang, Yansong and Zhou, Jie and Lu, Jiwen}, title = {FlowIE: Efficient Image Enhancement via Rectified Flow}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13-22} }
Aligning and Prompting Everything All at Once for Universal Visual Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shen_2024_CVPR, author = {Shen, Yunhang and Fu, Chaoyou and Chen, Peixian and Zhang, Mengdan and Li, Ke and Sun, Xing and Wu, Yunsheng and Lin, Shaohui and Ji, Rongrong}, title = {Aligning and Prompting Everything All at Once for Universal Visual Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13193-13203} }
Correlation-Decoupled Knowledge Distillation for Multimodal Sentiment Analysis with Incomplete Modalities-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Mingcheng and Yang, Dingkang and Zhao, Xiao and Wang, Shuaibing and Wang, Yan and Yang, Kun and Sun, Mingyang and Kou, Dongliang and Qian, Ziyun and Zhang, Lihua}, title = {Correlation-Decoupled Knowledge Distillation for Multimodal Sentiment Analysis with Incomplete Modalities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12458-12468} }
Revisiting Adversarial Training at Scale-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Zeyu and Li, Xianhang and Zhu, Hongru and Xie, Cihang}, title = {Revisiting Adversarial Training at Scale}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24675-24685} }
Towards Fairness-Aware Adversarial Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yanghao and Zhang, Tianle and Mu, Ronghui and Huang, Xiaowei and Ruan, Wenjie}, title = {Towards Fairness-Aware Adversarial Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24746-24755} }
LoSh: Long-Short Text Joint Prediction Network for Referring Video Object Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2024_CVPR, author = {Yuan, Linfeng and Shi, Miaojing and Yue, Zijie and Chen, Qijun}, title = {LoSh: Long-Short Text Joint Prediction Network for Referring Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14001-14010} }
MirageRoom: 3D Scene Segmentation with 2D Pre-trained Models by Mirage Projection-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2024_CVPR, author = {Sun, Haowen and Duan, Yueqi and Yan, Juncheng and Liu, Yifan and Lu, Jiwen}, title = {MirageRoom: 3D Scene Segmentation with 2D Pre-trained Models by Mirage Projection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20237-20246} }
In2SET: Intra-Inter Similarity Exploiting Transformer for Dual-Camera Compressive Hyperspectral Imaging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Xin and Wang, Lizhi and Ma, Xiangtian and Zhang, Maoqing and Zhu, Lin and Huang, Hua}, title = {In2SET: Intra-Inter Similarity Exploiting Transformer for Dual-Camera Compressive Hyperspectral Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24881-24891} }
Dual Prototype Attention for Unsupervised Video Object Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Cho_2024_CVPR, author = {Cho, Suhwan and Lee, Minhyeok and Lee, Seunghoon and Lee, Dogyoon and Choi, Heeseung and Kim, Ig-Jae and Lee, Sangyoun}, title = {Dual Prototype Attention for Unsupervised Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19238-19247} }
Look-Up Table Compression for Efficient Image Restoration-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Yinglong and Li, Jiacheng and Xiong, Zhiwei}, title = {Look-Up Table Compression for Efficient Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26016-26025} }
TextNeRF: A Novel Scene-Text Image Synthesis Method based on Neural Radiance Fields-
[pdf]
[supp]
[bibtex]@InProceedings{Cui_2024_CVPR, author = {Cui, Jialei and Du, Jianwei and Liu, Wenzhuo and Lian, Zhouhui}, title = {TextNeRF: A Novel Scene-Text Image Synthesis Method based on Neural Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22272-22281} }
Dr.Hair: Reconstructing Scalp-Connected Hair Strands without Pre-Training via Differentiable Rendering of Line Segments-
[pdf]
[supp]
[bibtex]@InProceedings{Takimoto_2024_CVPR, author = {Takimoto, Yusuke and Takehara, Hikari and Sato, Hiroyuki and Zhu, Zihao and Zheng, Bo}, title = {Dr.Hair: Reconstructing Scalp-Connected Hair Strands without Pre-Training via Differentiable Rendering of Line Segments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20601-20611} }
Improving Training Efficiency of Diffusion Models via Multi-Stage Framework and Tailored Multi-Decoder Architecture-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Huijie and Lu, Yifu and Alkhouri, Ismail and Ravishankar, Saiprasad and Song, Dogyoon and Qu, Qing}, title = {Improving Training Efficiency of Diffusion Models via Multi-Stage Framework and Tailored Multi-Decoder Architecture}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7372-7381} }
In-Context Matting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2024_CVPR, author = {Guo, He and Ye, Zixuan and Cao, Zhiguo and Lu, Hao}, title = {In-Context Matting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3711-3720} }
Navigate Beyond Shortcuts: Debiased Learning Through the Lens of Neural Collapse-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Yining and Sun, Junjie and Wang, Chenyue and Zhang, Mi and Yang, Min}, title = {Navigate Beyond Shortcuts: Debiased Learning Through the Lens of Neural Collapse}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12322-12331} }
DiVa-360: The Dynamic Visual Dataset for Immersive Neural Fields-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2024_CVPR, author = {Lu, Cheng-You and Zhou, Peisen and Xing, Angela and Pokhariya, Chandradeep and Dey, Arnab and Shah, Ishaan Nikhil and Mavidipalli, Rugved and Hu, Dylan and Comport, Andrew I. and Chen, Kefan and Sridhar, Srinath}, title = {DiVa-360: The Dynamic Visual Dataset for Immersive Neural Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22466-22476} }
A Subspace-Constrained Tyler's Estimator and its Applications to Structure from Motion-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Feng and Zhang, Teng and Lerman, Gilad}, title = {A Subspace-Constrained Tyler's Estimator and its Applications to Structure from Motion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14575-14584} }
FSC: Few-point Shape Completion-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Xianzu and Wu, Xianfeng and Luan, Tianyu and Bai, Yajing and Lai, Zhongyuan and Yuan, Junsong}, title = {FSC: Few-point Shape Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26077-26087} }
CAD: Photorealistic 3D Generation via Adversarial Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wan_2024_CVPR, author = {Wan, Ziyu and Paschalidou, Despoina and Huang, Ian and Liu, Hongyu and Shen, Bokui and Xiang, Xiaoyu and Liao, Jing and Guibas, Leonidas}, title = {CAD: Photorealistic 3D Generation via Adversarial Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10194-10207} }
Enhancing Vision-Language Pre-training with Rich Supervisions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2024_CVPR, author = {Gao, Yuan and Shi, Kunyu and Zhu, Pengkai and Belval, Edouard and Nuriel, Oren and Appalaraju, Srikar and Ghadar, Shabnam and Tu, Zhuowen and Mahadevan, Vijay and Soatto, Stefano}, title = {Enhancing Vision-Language Pre-training with Rich Supervisions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13480-13491} }
T-VSL: Text-Guided Visual Sound Source Localization in Mixtures-
[pdf]
[supp]
[bibtex]@InProceedings{Mahmud_2024_CVPR, author = {Mahmud, Tanvir and Tian, Yapeng and Marculescu, Diana}, title = {T-VSL: Text-Guided Visual Sound Source Localization in Mixtures}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26742-26751} }
DemoCaricature: Democratising Caricature Generation with a Rough Sketch-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Dar-Yen and Bhunia, Ayan Kumar and Koley, Subhadeep and Sain, Aneeshan and Chowdhury, Pinaki Nath and Song, Yi-Zhe}, title = {DemoCaricature: Democratising Caricature Generation with a Rough Sketch}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8629-8639} }
CapHuman: Capture Your Moments in Parallel Universes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2024_CVPR, author = {Liang, Chao and Ma, Fan and Zhu, Linchao and Deng, Yingying and Yang, Yi}, title = {CapHuman: Capture Your Moments in Parallel Universes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6400-6409} }
SDPose: Tokenized Pose Estimation via Circulation-Guide Self-Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Sichen and Zhang, Yingyi and Huang, Siming and Yi, Ran and Fan, Ke and Zhang, Ruixin and Chen, Peixian and Wang, Jun and Ding, Shouhong and Ma, Lizhuang}, title = {SDPose: Tokenized Pose Estimation via Circulation-Guide Self-Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1082-1090} }
Authentic Hand Avatar from a Phone Scan via Universal Hand Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Moon_2024_CVPR, author = {Moon, Gyeongsik and Xu, Weipeng and Joshi, Rohan and Wu, Chenglei and Shiratori, Takaaki}, title = {Authentic Hand Avatar from a Phone Scan via Universal Hand Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2029-2038} }
VCoder: Versatile Vision Encoders for Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jain_2024_CVPR, author = {Jain, Jitesh and Yang, Jianwei and Shi, Humphrey}, title = {VCoder: Versatile Vision Encoders for Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27992-28002} }
Event-based Visible and Infrared Fusion via Multi-task Collaboration-
[pdf]
[supp]
[bibtex]@InProceedings{Geng_2024_CVPR, author = {Geng, Mengyue and Zhu, Lin and Wang, Lizhi and Zhang, Wei and Xiong, Ruiqin and Tian, Yonghong}, title = {Event-based Visible and Infrared Fusion via Multi-task Collaboration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26929-26939} }
Open-World Semantic Segmentation Including Class Similarity-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sodano_2024_CVPR, author = {Sodano, Matteo and Magistri, Federico and Nunes, Lucas and Behley, Jens and Stachniss, Cyrill}, title = {Open-World Semantic Segmentation Including Class Similarity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3184-3194} }
RegionPLC: Regional Point-Language Contrastive Learning for Open-World 3D Scene Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Jihan and Ding, Runyu and Deng, Weipeng and Wang, Zhe and Qi, Xiaojuan}, title = {RegionPLC: Regional Point-Language Contrastive Learning for Open-World 3D Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19823-19832} }
Adaptive VIO: Deep Visual-Inertial Odometry with Online Continual Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2024_CVPR, author = {Pan, Youqi and Zhou, Wugen and Cao, Yingdian and Zha, Hongbin}, title = {Adaptive VIO: Deep Visual-Inertial Odometry with Online Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18019-18028} }
Towards Memorization-Free Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Chen and Liu, Daochang and Xu, Chang}, title = {Towards Memorization-Free Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8425-8434} }
Generalized Large-Scale Data Condensation via Various Backbone and Statistical Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shao_2024_CVPR, author = {Shao, Shitong and Yin, Zeyuan and Zhou, Muxin and Zhang, Xindong and Shen, Zhiqiang}, title = {Generalized Large-Scale Data Condensation via Various Backbone and Statistical Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16709-16718} }
Three Pillars Improving Vision Foundation Model Distillation for Lidar-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Puy_2024_CVPR, author = {Puy, Gilles and Gidaris, Spyros and Boulch, Alexandre and Sim\'eoni, Oriane and Sautier, Corentin and P\'erez, Patrick and Bursuc, Andrei and Marlet, Renaud}, title = {Three Pillars Improving Vision Foundation Model Distillation for Lidar}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21519-21529} }
On Train-Test Class Overlap and Detection for Image Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2024_CVPR, author = {Song, Chull Hwan and Yoon, Jooyoung and Hwang, Taebaek and Choi, Shunghyun and Gu, Yeong Hyeon and Avrithis, Yannis}, title = {On Train-Test Class Overlap and Detection for Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17375-17384} }
AttriHuman-3D: Editable 3D Human Avatar Generation with Attribute Decomposition and Indexing-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Fan and Chen, Tianyi and He, Xiaosheng and Cai, Zhongang and Yang, Lei and Wu, Si and Lin, Guosheng}, title = {AttriHuman-3D: Editable 3D Human Avatar Generation with Attribute Decomposition and Indexing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10596-10605} }
IQ-VFI: Implicit Quadratic Motion Estimation for Video Frame Interpolation-
[pdf]
[bibtex]@InProceedings{Hu_2024_CVPR, author = {Hu, Mengshun and Jiang, Kui and Zhong, Zhihang and Wang, Zheng and Zheng, Yinqiang}, title = {IQ-VFI: Implicit Quadratic Motion Estimation for Video Frame Interpolation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6410-6419} }
KeyPoint Relative Position Encoding for Face Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Minchul and Su, Yiyang and Liu, Feng and Jain, Anil and Liu, Xiaoming}, title = {KeyPoint Relative Position Encoding for Face Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {244-255} }
Hyper-MD: Mesh Denoising with Customized Parameters Aware of Noise Intensity and Geometric Characteristics-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Xingtao and Wei, Hongliang and Fan, Xiaopeng and Zhao, Debin}, title = {Hyper-MD: Mesh Denoising with Customized Parameters Aware of Noise Intensity and Geometric Characteristics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4651-4660} }
Learning Object State Changes in Videos: An Open-World Perspective-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xue_2024_CVPR, author = {Xue, Zihui and Ashutosh, Kumar and Grauman, Kristen}, title = {Learning Object State Changes in Videos: An Open-World Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18493-18503} }
Beyond First-Order Tweedie: Solving Inverse Problems using Latent Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rout_2024_CVPR, author = {Rout, Litu and Chen, Yujia and Kumar, Abhishek and Caramanis, Constantine and Shakkottai, Sanjay and Chu, Wen-Sheng}, title = {Beyond First-Order Tweedie: Solving Inverse Problems using Latent Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9472-9481} }
Rethinking the Objectives of Vector-Quantized Tokenizers for Image Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gu_2024_CVPR, author = {Gu, Yuchao and Wang, Xintao and Ge, Yixiao and Shan, Ying and Shou, Mike Zheng}, title = {Rethinking the Objectives of Vector-Quantized Tokenizers for Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7631-7640} }
ShapeWalk: Compositional Shape Editing Through Language-Guided Chains-
[pdf]
[supp]
[bibtex]@InProceedings{Slim_2024_CVPR, author = {Slim, Habib and Elhoseiny, Mohamed}, title = {ShapeWalk: Compositional Shape Editing Through Language-Guided Chains}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22574-22583} }
MESA: Matching Everything by Segmenting Anything-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yesheng and Zhao, Xu}, title = {MESA: Matching Everything by Segmenting Anything}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20217-20226} }
Learning Degradation-Independent Representations for Camera ISP Pipelines-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2024_CVPR, author = {Guo, Yanhui and Luo, Fangzhou and Wu, Xiaolin}, title = {Learning Degradation-Independent Representations for Camera ISP Pipelines}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25774-25783} }
SCoFT: Self-Contrastive Fine-Tuning for Equitable Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Zhixuan and Schaldenbrand, Peter and Okogwu, Beverley-Claire and Peng, Wenxuan and Yun, Youngsik and Hundt, Andrew and Kim, Jihie and Oh, Jean}, title = {SCoFT: Self-Contrastive Fine-Tuning for Equitable Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10822-10832} }
Continuous Pose for Monocular Cameras in Neural Implicit Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2024_CVPR, author = {Ma, Qi and Paudel, Danda Pani and Chhatkuli, Ajad and Van Gool, Luc}, title = {Continuous Pose for Monocular Cameras in Neural Implicit Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5291-5301} }
OmniGlue: Generalizable Feature Matching with Foundation Model Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2024_CVPR, author = {Jiang, Hanwen and Karpur, Arjun and Cao, Bingyi and Huang, Qixing and Araujo, Andr\'e}, title = {OmniGlue: Generalizable Feature Matching with Foundation Model Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19865-19875} }
D^4: Dataset Distillation via Disentangled Diffusion Model-
[pdf]
[supp]
[bibtex]@InProceedings{Su_2024_CVPR, author = {Su, Duo and Hou, Junjie and Gao, Weizhi and Tian, Yingjie and Tang, Bowen}, title = {D{\textasciicircum}4: Dataset Distillation via Disentangled Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5809-5818} }
OmniSDF: Scene Reconstruction using Omnidirectional Signed Distance Functions and Adaptive Binoctrees-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Hakyeong and Meuleman, Andreas and Jang, Hyeonjoong and Tompkin, James and Kim, Min H.}, title = {OmniSDF: Scene Reconstruction using Omnidirectional Signed Distance Functions and Adaptive Binoctrees}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20227-20236} }
Generating Content for HDR Deghosting from Frequency View-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2024_CVPR, author = {Hu, Tao and Yan, Qingsen and Qi, Yuankai and Zhang, Yanning}, title = {Generating Content for HDR Deghosting from Frequency View}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25732-25741} }
Iterated Learning Improves Compositionality in Large Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2024_CVPR, author = {Zheng, Chenhao and Zhang, Jieyu and Kembhavi, Aniruddha and Krishna, Ranjay}, title = {Iterated Learning Improves Compositionality in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13785-13795} }
Event Stream-based Visual Object Tracking: A High-Resolution Benchmark Dataset and A Novel Baseline-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Xiao and Wang, Shiao and Tang, Chuanming and Zhu, Lin and Jiang, Bo and Tian, Yonghong and Tang, Jin}, title = {Event Stream-based Visual Object Tracking: A High-Resolution Benchmark Dataset and A Novel Baseline}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19248-19257} }
LiDAR-Net: A Real-scanned 3D Point Cloud Dataset for Indoor Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2024_CVPR, author = {Guo, Yanwen and Li, Yuanqi and Ren, Dayong and Zhang, Xiaohong and Li, Jiawei and Pu, Liang and Ma, Changfeng and Zhan, Xiaoyu and Guo, Jie and Wei, Mingqiang and Zhang, Yan and Yu, Piaopiao and Yang, Shuangyu and Ji, Donghao and Ye, Huisheng and Sun, Hao and Liu, Yansong and Chen, Yinuo and Zhu, Jiaqi and Liu, Hongyu}, title = {LiDAR-Net: A Real-scanned 3D Point Cloud Dataset for Indoor Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21989-21999} }
Dual DETRs for Multi-Label Temporal Action Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Yuhan and Zhang, Guozhen and Tan, Jing and Wu, Gangshan and Wang, Limin}, title = {Dual DETRs for Multi-Label Temporal Action Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18559-18569} }
Rich Human Feedback for Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2024_CVPR, author = {Liang, Youwei and He, Junfeng and Li, Gang and Li, Peizhao and Klimovskiy, Arseniy and Carolan, Nicholas and Sun, Jiao and Pont-Tuset, Jordi and Young, Sarah and Yang, Feng and Ke, Junjie and Dvijotham, Krishnamurthy Dj and Collins, Katherine M. and Luo, Yiwen and Li, Yang and Kohlhoff, Kai J and Ramachandran, Deepak and Navalpakkam, Vidhya}, title = {Rich Human Feedback for Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19401-19411} }
360DVD: Controllable Panorama Video Generation with 360-Degree Video Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Qian and Li, Weiqi and Mou, Chong and Cheng, Xinhua and Zhang, Jian}, title = {360DVD: Controllable Panorama Video Generation with 360-Degree Video Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6913-6923} }
Map-Relative Pose Regression for Visual Re-Localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Shuai and Cavallari, Tommaso and Prisacariu, Victor Adrian and Brachmann, Eric}, title = {Map-Relative Pose Regression for Visual Re-Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20665-20674} }
Implicit Event-RGBD Neural SLAM-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qu_2024_CVPR, author = {Qu, Delin and Yan, Chi and Wang, Dong and Yin, Jie and Chen, Qizhi and Xu, Dan and Zhang, Yiting and Zhao, Bin and Li, Xuelong}, title = {Implicit Event-RGBD Neural SLAM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19584-19594} }
Virtual Immunohistochemistry Staining for Histological Images Assisted by Weakly-supervised Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Jiahan and Dong, Jiuyang and Huang, Shenjin and Li, Xi and Jiang, Junjun and Fan, Xiaopeng and Zhang, Yongbing}, title = {Virtual Immunohistochemistry Staining for Histological Images Assisted by Weakly-supervised Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11259-11268} }
DeCoTR: Enhancing Depth Completion with 2D and 3D Attentions-
[pdf]
[arXiv]
[bibtex]@InProceedings{Shi_2024_CVPR, author = {Shi, Yunxiao and Singh, Manish Kumar and Cai, Hong and Porikli, Fatih}, title = {DeCoTR: Enhancing Depth Completion with 2D and 3D Attentions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10736-10746} }
Utility-Fairness Trade-Offs and How to Find Them-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dehdashtian_2024_CVPR, author = {Dehdashtian, Sepehr and Sadeghi, Bashir and Boddeti, Vishnu Naresh}, title = {Utility-Fairness Trade-Offs and How to Find Them}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12037-12046} }
Domain-Specific Block Selection and Paired-View Pseudo-Labeling for Online Test-Time Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Yeonguk and Shin, Sungho and Back, Seunghyeok and Ko, Mihwan and Noh, Sangjun and Lee, Kyoobin}, title = {Domain-Specific Block Selection and Paired-View Pseudo-Labeling for Online Test-Time Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22723-22732} }
Aerial Lifting: Neural Urban Semantic and Building Instance Lifting from Aerial Imagery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yuqi and Chen, Guanying and Chen, Jiaxing and Cui, Shuguang}, title = {Aerial Lifting: Neural Urban Semantic and Building Instance Lifting from Aerial Imagery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21092-21103} }
SAOR: Single-View Articulated Object Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Aygun_2024_CVPR, author = {Aygun, Mehmet and Mac Aodha, Oisin}, title = {SAOR: Single-View Articulated Object Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10382-10391} }
A Theory of Joint Light and Heat Transport for Lambertian Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Ramanagopal_2024_CVPR, author = {Ramanagopal, Mani and Narayanan, Sriram and Sankaranarayanan, Aswin C. and Narasimhan, Srinivasa G.}, title = {A Theory of Joint Light and Heat Transport for Lambertian Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11924-11933} }
iKUN: Speak to Trackers without Retraining-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Du_2024_CVPR, author = {Du, Yunhao and Lei, Cheng and Zhao, Zhicheng and Su, Fei}, title = {iKUN: Speak to Trackers without Retraining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19135-19144} }
RankMatch: Exploring the Better Consistency Regularization for Semi-supervised Semantic Segmentation-
[pdf]
[bibtex]@InProceedings{Mai_2024_CVPR, author = {Mai, Huayu and Sun, Rui and Zhang, Tianzhu and Wu, Feng}, title = {RankMatch: Exploring the Better Consistency Regularization for Semi-supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3391-3401} }
Facial Identity Anonymization via Intrinsic and Extrinsic Attention Distraction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kuang_2024_CVPR, author = {Kuang, Zhenzhong and Yang, Xiaochen and Shen, Yingjie and Hu, Chao and Yu, Jun}, title = {Facial Identity Anonymization via Intrinsic and Extrinsic Attention Distraction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12406-12415} }
3D-SceneDreamer: Text-Driven 3D-Consistent Scene Generation-
[pdf]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Songchun and Zhang, Yibo and Zheng, Quan and Ma, Rui and Hua, Wei and Bao, Hujun and Xu, Weiwei and Zou, Changqing}, title = {3D-SceneDreamer: Text-Driven 3D-Consistent Scene Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10170-10180} }
VMINer: Versatile Multi-view Inverse Rendering with Near- and Far-field Light Sources-
[pdf]
[supp]
[bibtex]@InProceedings{Fei_2024_CVPR, author = {Fei, Fan and Tang, Jiajun and Tan, Ping and Shi, Boxin}, title = {VMINer: Versatile Multi-view Inverse Rendering with Near- and Far-field Light Sources}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11800-11809} }
RoHM: Robust Human Motion Reconstruction via Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Siwei and Bhatnagar, Bharat Lal and Xu, Yuanlu and Winkler, Alexander and Kadlecek, Petr and Tang, Siyu and Bogo, Federica}, title = {RoHM: Robust Human Motion Reconstruction via Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14606-14617} }
Do You Remember? Dense Video Captioning with Cross-Modal Memory Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Minkuk and Kim, Hyeon Bae and Moon, Jinyoung and Choi, Jinwoo and Kim, Seong Tae}, title = {Do You Remember? Dense Video Captioning with Cross-Modal Memory Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13894-13904} }
DuPL: Dual Student with Trustworthy Progressive Learning for Robust Weakly Supervised Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Yuanchen and Ye, Xichen and Yang, Kequan and Li, Jide and Li, Xiaoqiang}, title = {DuPL: Dual Student with Trustworthy Progressive Learning for Robust Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3534-3543} }
Learning with Structural Labels for Learning with Noisy Labels-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Noo-ri and Lee, Jin-Seop and Lee, Jee-Hyong}, title = {Learning with Structural Labels for Learning with Noisy Labels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27610-27620} }
SurMo: Surface-based 4D Motion Modeling for Dynamic Human Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2024_CVPR, author = {Hu, Tao and Hong, Fangzhou and Liu, Ziwei}, title = {SurMo: Surface-based 4D Motion Modeling for Dynamic Human Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6550-6560} }
SPAD: Spatially Aware Multi-View Diffusers-
[pdf]
[supp]
[bibtex]@InProceedings{Kant_2024_CVPR, author = {Kant, Yash and Siarohin, Aliaksandr and Wu, Ziyi and Vasilkovsky, Michael and Qian, Guocheng and Ren, Jian and Guler, Riza Alp and Ghanem, Bernard and Tulyakov, Sergey and Gilitschenski, Igor}, title = {SPAD: Spatially Aware Multi-View Diffusers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10026-10038} }
Gradient Reweighting: Towards Imbalanced Class-Incremental Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2024_CVPR, author = {He, Jiangpeng}, title = {Gradient Reweighting: Towards Imbalanced Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16668-16677} }
Hierarchical Spatio-temporal Decoupling for Text-to-Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qing_2024_CVPR, author = {Qing, Zhiwu and Zhang, Shiwei and Wang, Jiayu and Wang, Xiang and Wei, Yujie and Zhang, Yingya and Gao, Changxin and Sang, Nong}, title = {Hierarchical Spatio-temporal Decoupling for Text-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6635-6645} }
PLACE: Adaptive Layout-Semantic Fusion for Semantic Image Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lv_2024_CVPR, author = {Lv, Zhengyao and Wei, Yuxiang and Zuo, Wangmeng and Wong, Kwan-Yee K.}, title = {PLACE: Adaptive Layout-Semantic Fusion for Semantic Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9264-9274} }
Exploring Efficient Asymmetric Blind-Spots for Self-Supervised Denoising in Real-World Scenarios-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Shiyan and Zhang, Jiyuan and Yu, Zhaofei and Huang, Tiejun}, title = {Exploring Efficient Asymmetric Blind-Spots for Self-Supervised Denoising in Real-World Scenarios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2814-2823} }
Gaussian Splatting SLAM-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Matsuki_2024_CVPR, author = {Matsuki, Hidenobu and Murai, Riku and Kelly, Paul H.J. and Davison, Andrew J.}, title = {Gaussian Splatting SLAM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18039-18048} }
Not All Classes Stand on Same Embeddings: Calibrating a Semantic Distance with Metric Tensor-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2024_CVPR, author = {Park, Jae Hyeon and Lee, Gyoomin and Park, Seunggi and Cho, Sung In}, title = {Not All Classes Stand on Same Embeddings: Calibrating a Semantic Distance with Metric Tensor}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17722-17731} }
A Simple Recipe for Contrastively Pre-training Video-First Encoders Beyond 16 Frames-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Papalampidi_2024_CVPR, author = {Papalampidi, Pinelopi and Koppula, Skanda and Pathak, Shreya and Chiu, Justin and Heyward, Joe and Patraucean, Viorica and Shen, Jiajun and Miech, Antoine and Zisserman, Andrew and Nematzdeh, Aida}, title = {A Simple Recipe for Contrastively Pre-training Video-First Encoders Beyond 16 Frames}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14386-14397} }
DeMatch: Deep Decomposition of Motion Field for Two-View Correspondence Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Shihua and Li, Zizhuo and Gao, Yuan and Ma, Jiayi}, title = {DeMatch: Deep Decomposition of Motion Field for Two-View Correspondence Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20278-20287} }
Hierarchical Diffusion Policy for Kinematics-Aware Multi-Task Robotic Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2024_CVPR, author = {Ma, Xiao and Patidar, Sumit and Haughton, Iain and James, Stephen}, title = {Hierarchical Diffusion Policy for Kinematics-Aware Multi-Task Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18081-18090} }
Efficient Multi-scale Network with Learnable Discrete Wavelet Transform for Blind Motion Deblurring-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2024_CVPR, author = {Gao, Xin and Qiu, Tianheng and Zhang, Xinyu and Bai, Hanlin and Liu, Kang and Huang, Xuan and Wei, Hu and Zhang, Guoying and Liu, Huaping}, title = {Efficient Multi-scale Network with Learnable Discrete Wavelet Transform for Blind Motion Deblurring}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2733-2742} }
MaskPLAN: Masked Generative Layout Planning from Partial Input-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Hang and Savov, Anton and Dillenburger, Benjamin}, title = {MaskPLAN: Masked Generative Layout Planning from Partial Input}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8964-8973} }
Benchmarking the Robustness of Temporal Action Detection Models Against Temporal Corruptions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2024_CVPR, author = {Zeng, Runhao and Chen, Xiaoyong and Liang, Jiaming and Wu, Huisi and Cao, Guangzhong and Guo, Yong}, title = {Benchmarking the Robustness of Temporal Action Detection Models Against Temporal Corruptions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18263-18274} }
Open-World Human-Object Interaction Detection via Multi-modal Prompts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Jie and Li, Bingliang and Zeng, Ailing and Zhang, Lei and Zhang, Ruimao}, title = {Open-World Human-Object Interaction Detection via Multi-modal Prompts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16954-16964} }
HMD-Poser: On-Device Real-time Human Motion Tracking from Scalable Sparse Observations-
[pdf]
[supp]
[bibtex]@InProceedings{Dai_2024_CVPR, author = {Dai, Peng and Zhang, Yang and Liu, Tao and Fan, Zhen and Du, Tianyuan and Su, Zhuo and Zheng, Xiaozheng and Li, Zeming}, title = {HMD-Poser: On-Device Real-time Human Motion Tracking from Scalable Sparse Observations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {874-884} }
UniMODE: Unified Monocular 3D Object Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Zhuoling and Xu, Xiaogang and Lim, SerNam and Zhao, Hengshuang}, title = {UniMODE: Unified Monocular 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16561-16570} }
Sherpa3D: Boosting High-Fidelity Text-to-3D Generation via Coarse 3D Prior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Fangfu and Wu, Diankun and Wei, Yi and Rao, Yongming and Duan, Yueqi}, title = {Sherpa3D: Boosting High-Fidelity Text-to-3D Generation via Coarse 3D Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20763-20774} }
Flexible Biometrics Recognition: Bridging the Multimodality Gap through Attention Alignment and Prompt Tuning-
[pdf]
[supp]
[bibtex]@InProceedings{Tiong_2024_CVPR, author = {Tiong, Leslie Ching Ow and Sigmund, Dick and Chan, Chen-Hui and Teoh, Andrew Beng Jin}, title = {Flexible Biometrics Recognition: Bridging the Multimodality Gap through Attention Alignment and Prompt Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {267-276} }
Multi-agent Collaborative Perception via Motion-aware Robust Communication Network-
[pdf]
[bibtex]@InProceedings{Hong_2024_CVPR, author = {Hong, Shixin and Liu, Yu and Li, Zhi and Li, Shaohui and He, You}, title = {Multi-agent Collaborative Perception via Motion-aware Robust Communication Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15301-15310} }
The Manga Whisperer: Automatically Generating Transcriptions for Comics-
[pdf]
[arXiv]
[bibtex]@InProceedings{Sachdeva_2024_CVPR, author = {Sachdeva, Ragav and Zisserman, Andrew}, title = {The Manga Whisperer: Automatically Generating Transcriptions for Comics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12967-12976} }
Exploring Region-Word Alignment in Built-in Detector for Open-Vocabulary Object Detection-
[pdf]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Heng and Zhao, Qiuyu and Zheng, Linyu and Zeng, Hao and Ge, Zhiwei and Li, Tianhao and Xu, Sulong}, title = {Exploring Region-Word Alignment in Built-in Detector for Open-Vocabulary Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16975-16984} }
MovieChat: From Dense Token to Sparse Memory for Long Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2024_CVPR, author = {Song, Enxin and Chai, Wenhao and Wang, Guanhong and Zhang, Yucheng and Zhou, Haoyang and Wu, Feiyang and Chi, Haozhe and Guo, Xun and Ye, Tian and Zhang, Yanting and Lu, Yan and Hwang, Jenq-Neng and Wang, Gaoang}, title = {MovieChat: From Dense Token to Sparse Memory for Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18221-18232} }
Comparing the Decision-Making Mechanisms by Transformers and CNNs via Explanation Methods-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2024_CVPR, author = {Jiang, Mingqi and Khorram, Saeed and Fuxin, Li}, title = {Comparing the Decision-Making Mechanisms by Transformers and CNNs via Explanation Methods}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9546-9555} }
A Unified Diffusion Framework for Scene-aware Human Motion Estimation from Sparse Signals-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2024_CVPR, author = {Tang, Jiangnan and Wang, Jingya and Ji, Kaiyang and Xu, Lan and Yu, Jingyi and Shi, Ye}, title = {A Unified Diffusion Framework for Scene-aware Human Motion Estimation from Sparse Signals}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21251-21262} }
Single Domain Generalization for Crowd Counting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2024_CVPR, author = {Peng, Zhuoxuan and Chan, S.-H. Gary}, title = {Single Domain Generalization for Crowd Counting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28025-28034} }
Atlantis: Enabling Underwater Depth Estimation with Stable Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Fan and You, Shaodi and Li, Yu and Fu, Ying}, title = {Atlantis: Enabling Underwater Depth Estimation with Stable Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11852-11861} }
Matching Anything by Segmenting Anything-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Siyuan and Ke, Lei and Danelljan, Martin and Piccinelli, Luigi and Segu, Mattia and Van Gool, Luc and Yu, Fisher}, title = {Matching Anything by Segmenting Anything}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18963-18973} }
Task-Aware Encoder Control for Deep Video Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ge_2024_CVPR, author = {Ge, Xingtong and Luo, Jixiang and Zhang, Xinjie and Xu, Tongda and Lu, Guo and He, Dailan and Geng, Jing and Wang, Yan and Zhang, Jun and Qin, Hongwei}, title = {Task-Aware Encoder Control for Deep Video Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26036-26045} }
Multi-scale Dynamic and Hierarchical Relationship Modeling for Facial Action Units Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Zihan and Song, Siyang and Luo, Cheng and Deng, Songhe and Xie, Weicheng and Shen, Linlin}, title = {Multi-scale Dynamic and Hierarchical Relationship Modeling for Facial Action Units Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1270-1280} }
Decoupled Pseudo-labeling for Semi-Supervised Monocular 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Jiacheng and Li, Jiaming and Lin, Xiangru and Zhang, Wei and Tan, Xiao and Han, Junyu and Ding, Errui and Wang, Jingdong and Li, Guanbin}, title = {Decoupled Pseudo-labeling for Semi-Supervised Monocular 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16923-16932} }
Temporally Consistent Unbalanced Optimal Transport for Unsupervised Action Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Ming and Gould, Stephen}, title = {Temporally Consistent Unbalanced Optimal Transport for Unsupervised Action Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14618-14627} }
Learning Transferable Negative Prompts for Out-of-Distribution Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Tianqi and Pang, Guansong and Bai, Xiao and Miao, Wenjun and Zheng, Jin}, title = {Learning Transferable Negative Prompts for Out-of-Distribution Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17584-17594} }
Long-Tail Class Incremental Learning via Independent Sub-prototype Construction-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Xi and Yang, Xu and Yin, Jie and Wei, Kun and Deng, Cheng}, title = {Long-Tail Class Incremental Learning via Independent Sub-prototype Construction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28598-28607} }
Learning with Unreliability: Fast Few-shot Voxel Radiance Fields with Relative Geometric Consistency-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Yingjie and Liu, Bangzhen and Tang, Hao and Deng, Bailin and He, Shengfeng}, title = {Learning with Unreliability: Fast Few-shot Voxel Radiance Fields with Relative Geometric Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20342-20351} }
Towards Understanding and Improving Adversarial Robustness of Vision Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Jain_2024_CVPR, author = {Jain, Samyak and Dutta, Tanima}, title = {Towards Understanding and Improving Adversarial Robustness of Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24736-24745} }
EventEgo3D: 3D Human Motion Capture from Egocentric Event Streams-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Millerdurai_2024_CVPR, author = {Millerdurai, Christen and Akada, Hiroyasu and Wang, Jian and Luvizon, Diogo and Theobalt, Christian and Golyanik, Vladislav}, title = {EventEgo3D: 3D Human Motion Capture from Egocentric Event Streams}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1186-1195} }
Holistic Features are almost Sufficient for Text-to-Video Retrieval-
[pdf]
[bibtex]@InProceedings{Tian_2024_CVPR, author = {Tian, Kaibin and Zhao, Ruixiang and Xin, Zijie and Lan, Bangxiang and Li, Xirong}, title = {Holistic Features are almost Sufficient for Text-to-Video Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17138-17147} }
A Call to Reflect on Evaluation Practices for Age Estimation: Comparative Analysis of the State-of-the-Art and a Unified Benchmark-
[pdf]
[supp]
[bibtex]@InProceedings{Paplham_2024_CVPR, author = {Paplh\'am, Jakub and Franc, Vojt?ch}, title = {A Call to Reflect on Evaluation Practices for Age Estimation: Comparative Analysis of the State-of-the-Art and a Unified Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1196-1205} }
CosalPure: Learning Concept from Group Images for Robust Co-Saliency Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Jiayi and Guo, Qing and Juefei-Xu, Felix and Huang, Yihao and Liu, Yang and Pu, Geguang}, title = {CosalPure: Learning Concept from Group Images for Robust Co-Saliency Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3669-3678} }
Uncertainty-aware Action Decoupling Transformer for Action Anticipation-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2024_CVPR, author = {Guo, Hongji and Agarwal, Nakul and Lo, Shao-Yuan and Lee, Kwonjoon and Ji, Qiang}, title = {Uncertainty-aware Action Decoupling Transformer for Action Anticipation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18644-18654} }
MRFP: Learning Generalizable Semantic Segmentation from Sim-2-Real with Multi-Resolution Feature Perturbation-
[pdf]
[supp]
[bibtex]@InProceedings{Udupa_2024_CVPR, author = {Udupa, Sumanth and Gurunath, Prajwal and Sikdar, Aniruddh and Sundaram, Suresh}, title = {MRFP: Learning Generalizable Semantic Segmentation from Sim-2-Real with Multi-Resolution Feature Perturbation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5904-5914} }
S-DyRF: Reference-Based Stylized Radiance Fields for Dynamic Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Xingyi and Cao, Zhiguo and Wu, Yizheng and Wang, Kewei and Xian, Ke and Wang, Zhe and Lin, Guosheng}, title = {S-DyRF: Reference-Based Stylized Radiance Fields for Dynamic Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20102-20112} }
MotionEditor: Editing Video Motion via Content-Aware Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tu_2024_CVPR, author = {Tu, Shuyuan and Dai, Qi and Cheng, Zhi-Qi and Hu, Han and Han, Xintong and Wu, Zuxuan and Jiang, Yu-Gang}, title = {MotionEditor: Editing Video Motion via Content-Aware Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7882-7891} }
What How and When Should Object Detectors Update in Continually Changing Test Domains?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yoo_2024_CVPR, author = {Yoo, Jayeon and Lee, Dongkwan and Chung, Inseop and Kim, Donghyun and Kwak, Nojun}, title = {What How and When Should Object Detectors Update in Continually Changing Test Domains?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23354-23363} }
One-Prompt to Segment All Medical Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Junde and Xu, Min}, title = {One-Prompt to Segment All Medical Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11302-11312} }
Bayesian Exploration of Pre-trained Models for Low-shot Image Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Miao_2024_CVPR, author = {Miao, Yibo and Lei, Yu and Zhou, Feng and Deng, Zhijie}, title = {Bayesian Exploration of Pre-trained Models for Low-shot Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23849-23859} }
GROUNDHOG: Grounding Large Language Models to Holistic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yichi and Ma, Ziqiao and Gao, Xiaofeng and Shakiah, Suhaila and Gao, Qiaozi and Chai, Joyce}, title = {GROUNDHOG: Grounding Large Language Models to Holistic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14227-14238} }
Doubly Abductive Counterfactual Inference for Text-based Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2024_CVPR, author = {Song, Xue and Cui, Jiequan and Zhang, Hanwang and Chen, Jingjing and Hong, Richang and Jiang, Yu-Gang}, title = {Doubly Abductive Counterfactual Inference for Text-based Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9162-9171} }
RoMa: Robust Dense Feature Matching-
[pdf]
[supp]
[bibtex]@InProceedings{Edstedt_2024_CVPR, author = {Edstedt, Johan and Sun, Qiyu and B\"okman, Georg and Wadenb\"ack, M\r{a}rten and Felsberg, Michael}, title = {RoMa: Robust Dense Feature Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19790-19800} }
Omni-SMoLA: Boosting Generalist Multimodal Models with Soft Mixture of Low-rank Experts-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Jialin and Hu, Xia and Wang, Yaqing and Pang, Bo and Soricut, Radu}, title = {Omni-SMoLA: Boosting Generalist Multimodal Models with Soft Mixture of Low-rank Experts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14205-14215} }
SeMoLi: What Moves Together Belongs Together-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Seidenschwarz_2024_CVPR, author = {Seidenschwarz, Jenny and Osep, Aljosa and Ferroni, Francesco and Lucey, Simon and Leal-Taixe, Laura}, title = {SeMoLi: What Moves Together Belongs Together}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14685-14694} }
Insights from the Use of Previously Unseen Neural Architecture Search Datasets-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Geada_2024_CVPR, author = {Geada, Rob and Towers, David and Forshaw, Matthew and Atapour-Abarghouei, Amir and McGough, A. Stephen}, title = {Insights from the Use of Previously Unseen Neural Architecture Search Datasets}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22541-22550} }
Adversarially Robust Few-shot Learning via Parameter Co-distillation of Similarity and Class Concept Learners-
[pdf]
[supp]
[bibtex]@InProceedings{Dong_2024_CVPR, author = {Dong, Junhao and Koniusz, Piotr and Chen, Junxi and Xie, Xiaohua and Ong, Yew-Soon}, title = {Adversarially Robust Few-shot Learning via Parameter Co-distillation of Similarity and Class Concept Learners}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28535-28544} }
Context-Guided Spatio-Temporal Video Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gu_2024_CVPR, author = {Gu, Xin and Fan, Heng and Huang, Yan and Luo, Tiejian and Zhang, Libo}, title = {Context-Guided Spatio-Temporal Video Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18330-18339} }
Explaining the Implicit Neural Canvas: Connecting Pixels to Neurons by Tracing their Contributions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Padmanabhan_2024_CVPR, author = {Padmanabhan, Namitha and Gwilliam, Matthew and Kumar, Pulkit and Maiya, Shishira R and Ehrlich, Max and Shrivastava, Abhinav}, title = {Explaining the Implicit Neural Canvas: Connecting Pixels to Neurons by Tracing their Contributions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10957-10967} }
APISR: Anime Production Inspired Real-World Anime Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Boyang and Yang, Fengyu and Yu, Xihang and Zhang, Chao and Zhao, Hanbin}, title = {APISR: Anime Production Inspired Real-World Anime Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25574-25584} }
MVCPS-NeuS: Multi-view Constrained Photometric Stereo for Neural Surface Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Santo_2024_CVPR, author = {Santo, Hiroaki and Okura, Fumio and Matsushita, Yasuyuki}, title = {MVCPS-NeuS: Multi-view Constrained Photometric Stereo for Neural Surface Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20475-20484} }
ULIP-2: Towards Scalable Multimodal Pre-training for 3D Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Xue_2024_CVPR, author = {Xue, Le and Yu, Ning and Zhang, Shu and Panagopoulou, Artemis and Li, Junnan and Mart{\'\i}n-Mart{\'\i}n, Roberto and Wu, Jiajun and Xiong, Caiming and Xu, Ran and Niebles, Juan Carlos and Savarese, Silvio}, title = {ULIP-2: Towards Scalable Multimodal Pre-training for 3D Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27091-27101} }
Normalizing Flows on the Product Space of SO(3) Manifolds for Probabilistic Human Pose Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Dunkel_2024_CVPR, author = {D\"unkel, Olaf and Salzmann, Tim and Pfaff, Florian}, title = {Normalizing Flows on the Product Space of SO(3) Manifolds for Probabilistic Human Pose Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2285-2294} }
Adapting to Length Shift: FlexiLength Network for Trajectory Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Yi and Fu, Yun}, title = {Adapting to Length Shift: FlexiLength Network for Trajectory Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15226-15237} }
WorDepth: Variational Language Prior for Monocular Depth Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2024_CVPR, author = {Zeng, Ziyao and Wang, Daniel and Yang, Fengyu and Park, Hyoungseob and Soatto, Stefano and Lao, Dong and Wong, Alex}, title = {WorDepth: Variational Language Prior for Monocular Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9708-9719} }
WaveMo: Learning Wavefront Modulations to See Through Scattering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2024_CVPR, author = {Xie, Mingyang and Guo, Haiyun and Feng, Brandon Y. and Jin, Lingbo and Veeraraghavan, Ashok and Metzler, Christopher A.}, title = {WaveMo: Learning Wavefront Modulations to See Through Scattering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25276-25285} }
ReGenNet: Towards Human Action-Reaction Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Liang and Zhou, Yizhou and Yan, Yichao and Jin, Xin and Zhu, Wenhan and Rao, Fengyun and Yang, Xiaokang and Zeng, Wenjun}, title = {ReGenNet: Towards Human Action-Reaction Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1759-1769} }
A Simple Baseline for Efficient Hand Mesh Reconstruction-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Zhishan and Zhou, Shihao and Lv, Zhi and Zou, Minqiang and Tang, Yao and Liang, Jiajun}, title = {A Simple Baseline for Efficient Hand Mesh Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1367-1376} }
Integrating Efficient Optimal Transport and Functional Maps For Unsupervised Shape Correspondence Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Le_2024_CVPR, author = {Le, Tung and Nguyen, Khai and Sun, Shanlin and Ho, Nhat and Xie, Xiaohui}, title = {Integrating Efficient Optimal Transport and Functional Maps For Unsupervised Shape Correspondence Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23188-23198} }
PhotoMaker: Customizing Realistic Human Photos via Stacked ID Embedding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Zhen and Cao, Mingdeng and Wang, Xintao and Qi, Zhongang and Cheng, Ming-Ming and Shan, Ying}, title = {PhotoMaker: Customizing Realistic Human Photos via Stacked ID Embedding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8640-8650} }
Score-Guided Diffusion for 3D Human Recovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Stathopoulos_2024_CVPR, author = {Stathopoulos, Anastasis and Han, Ligong and Metaxas, Dimitris}, title = {Score-Guided Diffusion for 3D Human Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {906-915} }
Check Locate Rectify: A Training-Free Layout Calibration System for Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gong_2024_CVPR, author = {Gong, Biao and Huang, Siteng and Feng, Yutong and Zhang, Shiwei and Li, Yuyuan and Liu, Yu}, title = {Check Locate Rectify: A Training-Free Layout Calibration System for Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6624-6634} }
ODCR: Orthogonal Decoupling Contrastive Regularization for Unpaired Image Dehazing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Zhongze and Zhao, Haitao and Peng, Jingchao and Yao, Lujian and Zhao, Kaijie}, title = {ODCR: Orthogonal Decoupling Contrastive Regularization for Unpaired Image Dehazing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25479-25489} }
Pose-Transformed Equivariant Network for 3D Point Trajectory Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Ruixuan and Sun, Jian}, title = {Pose-Transformed Equivariant Network for 3D Point Trajectory Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5503-5512} }
OmniSeg3D: Omniversal 3D Segmentation via Hierarchical Contrastive Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ying_2024_CVPR, author = {Ying, Haiyang and Yin, Yixuan and Zhang, Jinzhi and Wang, Fan and Yu, Tao and Huang, Ruqi and Fang, Lu}, title = {OmniSeg3D: Omniversal 3D Segmentation via Hierarchical Contrastive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20612-20622} }
Revisiting Sampson Approximations for Geometric Estimation Problems-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rydell_2024_CVPR, author = {Rydell, Felix and Torres, Ang\'elica and Larsson, Viktor}, title = {Revisiting Sampson Approximations for Geometric Estimation Problems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4990-4998} }
Fixed Point Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bai_2024_CVPR, author = {Bai, Xingjian and Melas-Kyriazi, Luke}, title = {Fixed Point Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9430-9440} }
Simple Semantic-Aided Few-Shot Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Hai and Xu, Junzhe and Jiang, Shanlin and He, Zhenan}, title = {Simple Semantic-Aided Few-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28588-28597} }
A Unified Framework for Microscopy Defocus Deblur with Multi-Pyramid Transformer and Contrastive Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yuelin and Zheng, Pengyu and Yan, Wanquan and Fang, Chengyu and Cheng, Shing Shin}, title = {A Unified Framework for Microscopy Defocus Deblur with Multi-Pyramid Transformer and Contrastive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11125-11136} }
Frozen Feature Augmentation for Few-Shot Image Classification-
[pdf]
[supp]
[bibtex]@InProceedings{Bar_2024_CVPR, author = {B\"ar, Andreas and Houlsby, Neil and Dehghani, Mostafa and Kumar, Manoj}, title = {Frozen Feature Augmentation for Few-Shot Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16046-16057} }
Residual Learning in Diffusion Models-
[pdf]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Junyu and Liu, Daochang and Park, Eunbyung and Zhang, Shichao and Xu, Chang}, title = {Residual Learning in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7289-7299} }
Leveraging Cross-Modal Neighbor Representation for Improved CLIP Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yi_2024_CVPR, author = {Yi, Chao and Ren, Lu and Zhan, De-Chuan and Ye, Han-Jia}, title = {Leveraging Cross-Modal Neighbor Representation for Improved CLIP Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27402-27411} }
Beyond Textual Constraints: Learning Novel Diffusion Conditions with Fewer Examples-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Yuyang and Liu, Bangzhen and Zheng, Chenxi and Xu, Xuemiao and Zhang, Huaidong and He, Shengfeng}, title = {Beyond Textual Constraints: Learning Novel Diffusion Conditions with Fewer Examples}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7109-7118} }
Incorporating Geo-Diverse Knowledge into Prompting for Increased Geographical Robustness in Object Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Buettner_2024_CVPR, author = {Buettner, Kyle and Malakouti, Sina and Li, Xiang Lorraine and Kovashka, Adriana}, title = {Incorporating Geo-Diverse Knowledge into Prompting for Increased Geographical Robustness in Object Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13515-13524} }
Revisiting Adversarial Training Under Long-Tailed Distributions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yue_2024_CVPR, author = {Yue, Xinli and Mou, Ningping and Wang, Qian and Zhao, Lingchen}, title = {Revisiting Adversarial Training Under Long-Tailed Distributions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24492-24501} }
Exploiting Style Latent Flows for Generalizing Deepfake Video Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2024_CVPR, author = {Choi, Jongwook and Kim, Taehoon and Jeong, Yonghyun and Baek, Seungryul and Choi, Jongwon}, title = {Exploiting Style Latent Flows for Generalizing Deepfake Video Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1133-1143} }
PIN: Positional Insert Unlocks Object Localisation Abilities in VLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dorkenwald_2024_CVPR, author = {Dorkenwald, Michael and Barazani, Nimrod and Snoek, Cees G. M. and Asano, Yuki M.}, title = {PIN: Positional Insert Unlocks Object Localisation Abilities in VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13548-13558} }
UniGarmentManip: A Unified Framework for Category-Level Garment Manipulation via Dense Visual Correspondence-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Ruihai and Lu, Haoran and Wang, Yiyan and Wang, Yubo and Dong, Hao}, title = {UniGarmentManip: A Unified Framework for Category-Level Garment Manipulation via Dense Visual Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16340-16350} }
Multi-Attribute Interactions Matter for 3D Visual Grounding-
[pdf]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Can and Han, Yuehui and Xu, Rui and Hui, Le and Xie, Jin and Yang, Jian}, title = {Multi-Attribute Interactions Matter for 3D Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17253-17262} }
Video-P2P: Video Editing with Cross-attention Control-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Shaoteng and Zhang, Yuechen and Li, Wenbo and Lin, Zhe and Jia, Jiaya}, title = {Video-P2P: Video Editing with Cross-attention Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8599-8608} }
Hunting Attributes: Context Prototype-Aware Learning for Weakly Supervised Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2024_CVPR, author = {Tang, Feilong and Xu, Zhongxing and Qu, Zhaojun and Feng, Wei and Jiang, Xingjian and Ge, Zongyuan}, title = {Hunting Attributes: Context Prototype-Aware Learning for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3324-3334} }
SCINeRF: Neural Radiance Fields from a Snapshot Compressive Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Yunhao and Wang, Xiaodong and Wang, Ping and Yuan, Xin and Liu, Peidong}, title = {SCINeRF: Neural Radiance Fields from a Snapshot Compressive Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10542-10552} }
PIE-NeRF: Physics-based Interactive Elastodynamics with NeRF-
[pdf]
[supp]
[bibtex]@InProceedings{Feng_2024_CVPR, author = {Feng, Yutao and Shang, Yintong and Li, Xuan and Shao, Tianjia and Jiang, Chenfanfu and Yang, Yin}, title = {PIE-NeRF: Physics-based Interactive Elastodynamics with NeRF}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4450-4461} }
Improved Visual Grounding through Self-Consistent Explanations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2024_CVPR, author = {He, Ruozhen and Cascante-Bonilla, Paola and Yang, Ziyan and Berg, Alexander C. and Ordonez, Vicente}, title = {Improved Visual Grounding through Self-Consistent Explanations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13095-13105} }
Monkey: Image Resolution and Text Label Are Important Things for Large Multi-modal Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Zhang and Yang, Biao and Liu, Qiang and Ma, Zhiyin and Zhang, Shuo and Yang, Jingxu and Sun, Yabo and Liu, Yuliang and Bai, Xiang}, title = {Monkey: Image Resolution and Text Label Are Important Things for Large Multi-modal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26763-26773} }
FlashAvatar: High-fidelity Head Avatar with Efficient Gaussian Embedding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiang_2024_CVPR, author = {Xiang, Jun and Gao, Xuan and Guo, Yudong and Zhang, Juyong}, title = {FlashAvatar: High-fidelity Head Avatar with Efficient Gaussian Embedding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1802-1812} }
DifFlow3D: Toward Robust Uncertainty-Aware Scene Flow Estimation with Iterative Diffusion-Based Refinement-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Jiuming and Wang, Guangming and Ye, Weicai and Jiang, Chaokang and Han, Jinru and Liu, Zhe and Zhang, Guofeng and Du, Dalong and Wang, Hesheng}, title = {DifFlow3D: Toward Robust Uncertainty-Aware Scene Flow Estimation with Iterative Diffusion-Based Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15109-15119} }
Decompose-and-Compose: A Compositional Approach to Mitigating Spurious Correlation-
[pdf]
[supp]
[bibtex]@InProceedings{Noohdani_2024_CVPR, author = {Noohdani, Fahimeh Hosseini and Hosseini, Parsa and Parast, Aryan Yazdan and Araghi, Hamidreza Yaghoubi and Baghshah, Mahdieh Soleymani}, title = {Decompose-and-Compose: A Compositional Approach to Mitigating Spurious Correlation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27662-27671} }
FlashEval: Towards Fast and Accurate Evaluation of Text-to-image Diffusion Generative Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Lin and Zhao, Tianchen and Lin, Zinan and Ning, Xuefei and Dai, Guohao and Yang, Huazhong and Wang, Yu}, title = {FlashEval: Towards Fast and Accurate Evaluation of Text-to-image Diffusion Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16122-16131} }
ZERO-IG: Zero-Shot Illumination-Guided Joint Denoising and Adaptive Enhancement for Low-Light Images-
[pdf]
[supp]
[bibtex]@InProceedings{Shi_2024_CVPR, author = {Shi, Yiqi and Liu, Duo and Zhang, Liguo and Tian, Ye and Xia, Xuezhi and Fu, Xiaojing}, title = {ZERO-IG: Zero-Shot Illumination-Guided Joint Denoising and Adaptive Enhancement for Low-Light Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3015-3024} }
View From Above: Orthogonal-View aware Cross-view Localization-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Shan and Nguyen, Chuong and Liu, Jiawei and Zhang, Yanhao and Muthu, Sundaram and Maken, Fahira Afzal and Zhang, Kaihao and Li, Hongdong}, title = {View From Above: Orthogonal-View aware Cross-view Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14843-14852} }
FinePOSE: Fine-Grained Prompt-Driven 3D Human Pose Estimation via Diffusion Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Jinglin and Guo, Yijie and Peng, Yuxin}, title = {FinePOSE: Fine-Grained Prompt-Driven 3D Human Pose Estimation via Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {561-570} }
BEM: Balanced and Entropy-based Mix for Long-Tailed Semi-Supervised Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2024_CVPR, author = {Zheng, Hongwei and Zhou, Linyuan and Li, Han and Su, Jinming and Wei, Xiaoming and Xu, Xiaoming}, title = {BEM: Balanced and Entropy-based Mix for Long-Tailed Semi-Supervised Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22893-22903} }
HUGS: Holistic Urban 3D Scene Understanding via Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Hongyu and Shao, Jiahao and Xu, Lu and Bai, Dongfeng and Qiu, Weichao and Liu, Bingbing and Wang, Yue and Geiger, Andreas and Liao, Yiyi}, title = {HUGS: Holistic Urban 3D Scene Understanding via Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21336-21345} }
DreamPropeller: Supercharge Text-to-3D Generation with Parallel Sampling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Linqi and Shih, Andy and Meng, Chenlin and Ermon, Stefano}, title = {DreamPropeller: Supercharge Text-to-3D Generation with Parallel Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4610-4619} }
PeVL: Pose-Enhanced Vision-Language Model for Fine-Grained Human Action Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Haosong and Leong, Mei Chee and Li, Liyuan and Lin, Weisi}, title = {PeVL: Pose-Enhanced Vision-Language Model for Fine-Grained Human Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18857-18867} }
DeepCache: Accelerating Diffusion Models for Free-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2024_CVPR, author = {Ma, Xinyin and Fang, Gongfan and Wang, Xinchao}, title = {DeepCache: Accelerating Diffusion Models for Free}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15762-15772} }
GeoAuxNet: Towards Universal 3D Representation Learning for Multi-sensor Point Clouds-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Shengjun and Fei, Xin and Duan, Yueqi}, title = {GeoAuxNet: Towards Universal 3D Representation Learning for Multi-sensor Point Clouds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20019-20028} }
Unveiling the Power of Audio-Visual Early Fusion Transformers with Dense Interactions through Masked Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mo_2024_CVPR, author = {Mo, Shentong and Morgado, Pedro}, title = {Unveiling the Power of Audio-Visual Early Fusion Transformers with Dense Interactions through Masked Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27186-27196} }
Learning Correlation Structures for Vision Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Manjin and Seo, Paul Hongsuck and Schmid, Cordelia and Cho, Minsu}, title = {Learning Correlation Structures for Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18941-18951} }
Dysen-VDM: Empowering Dynamics-aware Text-to-Video Diffusion with LLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Fei_2024_CVPR, author = {Fei, Hao and Wu, Shengqiong and Ji, Wei and Zhang, Hanwang and Chua, Tat-Seng}, title = {Dysen-VDM: Empowering Dynamics-aware Text-to-Video Diffusion with LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7641-7653} }
PrPSeg: Universal Proposition Learning for Panoramic Renal Pathology Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deng_2024_CVPR, author = {Deng, Ruining and Liu, Quan and Cui, Can and Yao, Tianyuan and Yue, Jialin and Xiong, Juming and Yu, Lining and Wu, Yifei and Yin, Mengmeng and Wang, Yu and Zhao, Shilin and Tang, Yucheng and Yang, Haichun and Huo, Yuankai}, title = {PrPSeg: Universal Proposition Learning for Panoramic Renal Pathology Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11736-11746} }
RepKPU: Point Cloud Upsampling with Kernel Point Representation and Deformation-
[pdf]
[supp]
[bibtex]@InProceedings{Rong_2024_CVPR, author = {Rong, Yi and Zhou, Haoran and Xia, Kang and Mei, Cheng and Wang, Jiahao and Lu, Tong}, title = {RepKPU: Point Cloud Upsampling with Kernel Point Representation and Deformation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21050-21060} }
ConCon-Chi: Concept-Context Chimera Benchmark for Personalized Vision-Language Tasks-
[pdf]
[supp]
[bibtex]@InProceedings{Rosasco_2024_CVPR, author = {Rosasco, Andrea and Berti, Stefano and Pasquale, Giulia and Malafronte, Damiano and Sato, Shogo and Segawa, Hiroyuki and Inada, Tetsugo and Natale, Lorenzo}, title = {ConCon-Chi: Concept-Context Chimera Benchmark for Personalized Vision-Language Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22239-22248} }
Weakly-Supervised Audio-Visual Video Parsing with Prototype-based Pseudo-Labeling-
[pdf]
[supp]
[bibtex]@InProceedings{Rachavarapu_2024_CVPR, author = {Rachavarapu, Kranthi Kumar and Ramakrishnan, Kalyan and N., Rajagopalan A.}, title = {Weakly-Supervised Audio-Visual Video Parsing with Prototype-based Pseudo-Labeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18952-18962} }
Intraoperative 2D/3D Image Registration via Differentiable X-ray Rendering-
[pdf]
[supp]
[bibtex]@InProceedings{Gopalakrishnan_2024_CVPR, author = {Gopalakrishnan, Vivek and Dey, Neel and Golland, Polina}, title = {Intraoperative 2D/3D Image Registration via Differentiable X-ray Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11662-11672} }
MICap: A Unified Model for Identity-Aware Movie Descriptions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Raajesh_2024_CVPR, author = {Raajesh, Haran and Desanur, Naveen Reddy and Khan, Zeeshan and Tapaswi, Makarand}, title = {MICap: A Unified Model for Identity-Aware Movie Descriptions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14011-14021} }
MonoDiff: Monocular 3D Object Detection and Pose Estimation with Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Ranasinghe_2024_CVPR, author = {Ranasinghe, Yasiru and Hegde, Deepti and Patel, Vishal M.}, title = {MonoDiff: Monocular 3D Object Detection and Pose Estimation with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10659-10670} }
General Object Foundation Model for Images and Videos at Scale-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Junfeng and Jiang, Yi and Liu, Qihao and Yuan, Zehuan and Bai, Xiang and Bai, Song}, title = {General Object Foundation Model for Images and Videos at Scale}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3783-3795} }
An Upload-Efficient Scheme for Transferring Knowledge From a Server-Side Pre-trained Generator to Clients in Heterogeneous Federated Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Jianqing and Liu, Yang and Hua, Yang and Cao, Jian}, title = {An Upload-Efficient Scheme for Transferring Knowledge From a Server-Side Pre-trained Generator to Clients in Heterogeneous Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12109-12119} }
MeshGPT: Generating Triangle Meshes with Decoder-Only Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Siddiqui_2024_CVPR, author = {Siddiqui, Yawar and Alliegro, Antonio and Artemov, Alexey and Tommasi, Tatiana and Sirigatti, Daniele and Rosov, Vladislav and Dai, Angela and Nie{\ss}ner, Matthias}, title = {MeshGPT: Generating Triangle Meshes with Decoder-Only Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19615-19625} }
Inlier Confidence Calibration for Point Cloud Registration-
[pdf]
[supp]
[bibtex]@InProceedings{Yuan_2024_CVPR, author = {Yuan, Yongzhe and Wu, Yue and Fan, Xiaolong and Gong, Maoguo and Miao, Qiguang and Ma, Wenping}, title = {Inlier Confidence Calibration for Point Cloud Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5312-5321} }
Instance-aware Exploration-Verification-Exploitation for Instance ImageGoal Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lei_2024_CVPR, author = {Lei, Xiaohan and Wang, Min and Zhou, Wengang and Li, Li and Li, Houqiang}, title = {Instance-aware Exploration-Verification-Exploitation for Instance ImageGoal Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16329-16339} }
One-2-3-45++: Fast Single Image to 3D Objects with Consistent Multi-View Generation and 3D Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Minghua and Shi, Ruoxi and Chen, Linghao and Zhang, Zhuoyang and Xu, Chao and Wei, Xinyue and Chen, Hansheng and Zeng, Chong and Gu, Jiayuan and Su, Hao}, title = {One-2-3-45++: Fast Single Image to 3D Objects with Consistent Multi-View Generation and 3D Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10072-10083} }
Image Restoration by Denoising Diffusion Models with Iteratively Preconditioned Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Garber_2024_CVPR, author = {Garber, Tomer and Tirer, Tom}, title = {Image Restoration by Denoising Diffusion Models with Iteratively Preconditioned Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25245-25254} }
Let's Think Outside the Box: Exploring Leap-of-Thought in Large Language Models with Creative Humor Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhong_2024_CVPR, author = {Zhong, Shanshan and Huang, Zhongzhan and Gao, Shanghua and Wen, Wushao and Lin, Liang and Zitnik, Marinka and Zhou, Pan}, title = {Let's Think Outside the Box: Exploring Leap-of-Thought in Large Language Models with Creative Humor Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13246-13257} }
SceneFun3D: Fine-Grained Functionality and Affordance Understanding in 3D Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Delitzas_2024_CVPR, author = {Delitzas, Alexandros and Takmaz, Ayca and Tombari, Federico and Sumner, Robert and Pollefeys, Marc and Engelmann, Francis}, title = {SceneFun3D: Fine-Grained Functionality and Affordance Understanding in 3D Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14531-14542} }
Readout Guidance: Learning Control from Diffusion Features-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2024_CVPR, author = {Luo, Grace and Darrell, Trevor and Wang, Oliver and Goldman, Dan B and Holynski, Aleksander}, title = {Readout Guidance: Learning Control from Diffusion Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8217-8227} }
A Unified Approach for Text- and Image-guided 4D Scene Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Zheng_2024_CVPR, author = {Zheng, Yufeng and Li, Xueting and Nagano, Koki and Liu, Sifei and Hilliges, Otmar and De Mello, Shalini}, title = {A Unified Approach for Text- and Image-guided 4D Scene Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7300-7309} }
GaussianAvatar: Towards Realistic Human Avatar Modeling from a Single Video via Animatable 3D Gaussians-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2024_CVPR, author = {Hu, Liangxiao and Zhang, Hongwen and Zhang, Yuxiang and Zhou, Boyao and Liu, Boning and Zhang, Shengping and Nie, Liqiang}, title = {GaussianAvatar: Towards Realistic Human Avatar Modeling from a Single Video via Animatable 3D Gaussians}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {634-644} }
MTMMC: A Large-Scale Real-World Multi-Modal Camera Tracking Benchmark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Woo_2024_CVPR, author = {Woo, Sanghyun and Park, Kwanyong and Shin, Inkyu and Kim, Myungchul and Kweon, In So}, title = {MTMMC: A Large-Scale Real-World Multi-Modal Camera Tracking Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22335-22346} }
Enhanced Motion-Text Alignment for Image-to-Video Transfer Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Wei and Wan, Chaoqun and Liu, Tongliang and Tian, Xinmei and Shen, Xu and Ye, Jieping}, title = {Enhanced Motion-Text Alignment for Image-to-Video Transfer Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18504-18515} }
DAP: A Dynamic Adversarial Patch for Evading Person Detectors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guesmi_2024_CVPR, author = {Guesmi, Amira and Ding, Ruitian and Hanif, Muhammad Abdullah and Alouani, Ihsen and Shafique, Muhammad}, title = {DAP: A Dynamic Adversarial Patch for Evading Person Detectors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24595-24604} }
Learned Lossless Image Compression based on Bit Plane Slicing-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Zhe and Wang, Huairui and Chen, Zhenzhong and Liu, Shan}, title = {Learned Lossless Image Compression based on Bit Plane Slicing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27579-27588} }
UV-IDM: Identity-Conditioned Latent Diffusion Model for Face UV-Texture Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Hong and Feng, Yutang and Xue, Song and Liu, Xuhui and Zeng, Bohan and Li, Shanglin and Liu, Boyu and Liu, Jianzhuang and Han, Shumin and Zhang, Baochang}, title = {UV-IDM: Identity-Conditioned Latent Diffusion Model for Face UV-Texture Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10585-10595} }
Mosaic-SDF for 3D Generative Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yariv_2024_CVPR, author = {Yariv, Lior and Puny, Omri and Gafni, Oran and Lipman, Yaron}, title = {Mosaic-SDF for 3D Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4630-4639} }
Diffusion Handles Enabling 3D Edits for Diffusion Models by Lifting Activations to 3D-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pandey_2024_CVPR, author = {Pandey, Karran and Guerrero, Paul and Gadelha, Matheus and Hold-Geoffroy, Yannick and Singh, Karan and Mitra, Niloy J.}, title = {Diffusion Handles Enabling 3D Edits for Diffusion Models by Lifting Activations to 3D}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7695-7704} }
A Pedestrian is Worth One Prompt: Towards Language Guidance Person Re-Identification-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Zexian and Wu, Dayan and Wu, Chenming and Lin, Zheng and Gu, Jingzi and Wang, Weiping}, title = {A Pedestrian is Worth One Prompt: Towards Language Guidance Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17343-17353} }
Friendly Sharpness-Aware Minimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Tao and Zhou, Pan and He, Zhengbao and Cheng, Xinwen and Huang, Xiaolin}, title = {Friendly Sharpness-Aware Minimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5631-5640} }
BIVDiff: A Training-Free Framework for General-Purpose Video Synthesis via Bridging Image and Video Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2024_CVPR, author = {Shi, Fengyuan and Gu, Jiaxi and Xu, Hang and Xu, Songcen and Zhang, Wei and Wang, Limin}, title = {BIVDiff: A Training-Free Framework for General-Purpose Video Synthesis via Bridging Image and Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7393-7402} }
NC-TTT: A Noise Constrastive Approach for Test-Time Training-
[pdf]
[supp]
[bibtex]@InProceedings{Osowiechi_2024_CVPR, author = {Osowiechi, David and Hakim, Gustavo A. Vargas and Noori, Mehrdad and Cheraghalikhani, Milad and Bahri, Ali and Yazdanpanah, Moslem and Ben Ayed, Ismail and Desrosiers, Christian}, title = {NC-TTT: A Noise Constrastive Approach for Test-Time Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6078-6086} }
NetTrack: Tracking Highly Dynamic Objects with a Net-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2024_CVPR, author = {Zheng, Guangze and Lin, Shijie and Zuo, Haobo and Fu, Changhong and Pan, Jia}, title = {NetTrack: Tracking Highly Dynamic Objects with a Net}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19145-19155} }
Grounded Question-Answering in Long Egocentric Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Di_2024_CVPR, author = {Di, Shangzhe and Xie, Weidi}, title = {Grounded Question-Answering in Long Egocentric Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12934-12943} }
HPNet: Dynamic Trajectory Forecasting with Historical Prediction Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2024_CVPR, author = {Tang, Xiaolong and Kan, Meina and Shan, Shiguang and Ji, Zhilong and Bai, Jinfeng and Chen, Xilin}, title = {HPNet: Dynamic Trajectory Forecasting with Historical Prediction Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15261-15270} }
Flexible Depth Completion for Sparse and Varying Point Densities-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2024_CVPR, author = {Park, Jinhyung and Li, Yu-Jhe and Kitani, Kris}, title = {Flexible Depth Completion for Sparse and Varying Point Densities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21540-21550} }
Small Scale Data-Free Knowledge Distillation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, He and Wang, Yikai and Liu, Huaping and Sun, Fuchun and Yao, Anbang}, title = {Small Scale Data-Free Knowledge Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6008-6016} }
Shadows Don't Lie and Lines Can't Bend! Generative Models don't know Projective Geometry...for now-
[pdf]
[supp]
[bibtex]@InProceedings{Sarkar_2024_CVPR, author = {Sarkar, Ayush and Mai, Hanlin and Mahapatra, Amitabh and Lazebnik, Svetlana and Forsyth, D.A. and Bhattad, Anand}, title = {Shadows Don't Lie and Lines Can't Bend! Generative Models don't know Projective Geometry...for now}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28140-28149} }
CFPL-FAS: Class Free Prompt Learning for Generalizable Face Anti-spoofing-
[pdf]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Ajian and Xue, Shuai and Gan, Jianwen and Wan, Jun and Liang, Yanyan and Deng, Jiankang and Escalera, Sergio and Lei, Zhen}, title = {CFPL-FAS: Class Free Prompt Learning for Generalizable Face Anti-spoofing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {222-232} }
SI-MIL: Taming Deep MIL for Self-Interpretability in Gigapixel Histopathology-
[pdf]
[supp]
[bibtex]@InProceedings{Kapse_2024_CVPR, author = {Kapse, Saarthak and Pati, Pushpak and Das, Srijan and Zhang, Jingwei and Chen, Chao and Vakalopoulou, Maria and Saltz, Joel and Samaras, Dimitris and Gupta, Rajarsi R. and Prasanna, Prateek}, title = {SI-MIL: Taming Deep MIL for Self-Interpretability in Gigapixel Histopathology}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11226-11237} }
GEARS: Local Geometry-aware Hand-object Interaction Synthesis-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Keyang and Bhatnagar, Bharat Lal and Lenssen, Jan Eric and Pons-Moll, Gerard}, title = {GEARS: Local Geometry-aware Hand-object Interaction Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20634-20643} }
Open Vocabulary Semantic Scene Sketch Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bourouis_2024_CVPR, author = {Bourouis, Ahmed and Fan, Judith E. and Gryaditskaya, Yulia}, title = {Open Vocabulary Semantic Scene Sketch Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4176-4186} }
IntrinsicAvatar: Physically Based Inverse Rendering of Dynamic Humans from Monocular Videos via Explicit Ray Tracing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Shaofei and Antic, Bozidar and Geiger, Andreas and Tang, Siyu}, title = {IntrinsicAvatar: Physically Based Inverse Rendering of Dynamic Humans from Monocular Videos via Explicit Ray Tracing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1877-1888} }
Efficient Detection of Long Consistent Cycles and its Application to Distributed Synchronization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Shaohan and Shi, Yunpeng and Lerman, Gilad}, title = {Efficient Detection of Long Consistent Cycles and its Application to Distributed Synchronization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5260-5269} }
LayoutFormer: Hierarchical Text Detection Towards Scene Text Understanding-
[pdf]
[bibtex]@InProceedings{Liang_2024_CVPR, author = {Liang, Min and Ma, Jia-Wei and Zhu, Xiaobin and Qin, Jingyan and Yin, Xu-Cheng}, title = {LayoutFormer: Hierarchical Text Detection Towards Scene Text Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15665-15674} }
Vlogger: Make Your Dream A Vlog-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhuang_2024_CVPR, author = {Zhuang, Shaobin and Li, Kunchang and Chen, Xinyuan and Wang, Yaohui and Liu, Ziwei and Qiao, Yu and Wang, Yali}, title = {Vlogger: Make Your Dream A Vlog}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8806-8817} }
CodedEvents: Optimal Point-Spread-Function Engineering for 3D-Tracking with Event Cameras-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shah_2024_CVPR, author = {Shah, Sachin and Chan, Matthew A. and Cai, Haoming and Chen, Jingxi and Kulshrestha, Sakshum and Singh, Chahat Deep and Aloimonos, Yiannis and Metzler, Christopher A.}, title = {CodedEvents: Optimal Point-Spread-Function Engineering for 3D-Tracking with Event Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25265-25275} }
GLOW: Global Layout Aware Attacks on Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Bao_2024_CVPR, author = {Bao, Jun and Liu, Buyu and Ren, Kui and Yu, Jun}, title = {GLOW: Global Layout Aware Attacks on Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12057-12066} }
Learning Discriminative Dynamics with Label Corruption for Noisy Label Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Suyeon and Lee, Dongha and Kang, SeongKu and Chae, Sukang and Jang, Sanghwan and Yu, Hwanjo}, title = {Learning Discriminative Dynamics with Label Corruption for Noisy Label Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22477-22487} }
Neural 3D Strokes: Creating Stylized 3D Scenes with Vectorized 3D Strokes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Duan_2024_CVPR, author = {Duan, Hao-Bin and Wang, Miao and Li, Yan-Xun and Yang, Yong-Liang}, title = {Neural 3D Strokes: Creating Stylized 3D Scenes with Vectorized 3D Strokes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5240-5249} }
SIRA: Scalable Inter-frame Relation and Association for Radar Perception-
[pdf]
[supp]
[bibtex]@InProceedings{Yataka_2024_CVPR, author = {Yataka, Ryoma and Wang, Pu and Boufounos, Petros and Takahashi, Ryuhei}, title = {SIRA: Scalable Inter-frame Relation and Association for Radar Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15024-15034} }
VOODOO 3D: Volumetric Portrait Disentanglement For One-Shot 3D Head Reenactment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tran_2024_CVPR, author = {Tran, Phong and Zakharov, Egor and Ho, Long-Nhat and Tran, Anh Tuan and Hu, Liwen and Li, Hao}, title = {VOODOO 3D: Volumetric Portrait Disentanglement For One-Shot 3D Head Reenactment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10336-10348} }
Visual Fact Checker: Enabling High-Fidelity Detailed Caption Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ge_2024_CVPR, author = {Ge, Yunhao and Zeng, Xiaohui and Huffman, Jacob Samuel and Lin, Tsung-Yi and Liu, Ming-Yu and Cui, Yin}, title = {Visual Fact Checker: Enabling High-Fidelity Detailed Caption Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14033-14042} }
Communication-Efficient Collaborative Perception via Information Filling with Codebook-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2024_CVPR, author = {Hu, Yue and Peng, Juntong and Liu, Sifei and Ge, Junhao and Liu, Si and Chen, Siheng}, title = {Communication-Efficient Collaborative Perception via Information Filling with Codebook}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15481-15490} }
DiPrompT: Disentangled Prompt Tuning for Multiple Latent Domain Generalization in Federated Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bai_2024_CVPR, author = {Bai, Sikai and Zhang, Jie and Guo, Song and Li, Shuaicheng and Guo, Jingcai and Hou, Jun and Han, Tao and Lu, Xiaocheng}, title = {DiPrompT: Disentangled Prompt Tuning for Multiple Latent Domain Generalization in Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27284-27293} }
MVD-Fusion: Single-view 3D via Depth-consistent Multi-view Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2024_CVPR, author = {Hu, Hanzhe and Zhou, Zhizhuo and Jampani, Varun and Tulsiani, Shubham}, title = {MVD-Fusion: Single-view 3D via Depth-consistent Multi-view Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9698-9707} }
Effective Video Mirror Detection with Inconsistent Motion Cues-
[pdf]
[supp]
[bibtex]@InProceedings{Warren_2024_CVPR, author = {Warren, Alex and Xu, Ke and Lin, Jiaying and Tam, Gary K.L. and Lau, Rynson W.H.}, title = {Effective Video Mirror Detection with Inconsistent Motion Cues}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17244-17252} }
Multi-Object Tracking in the Dark-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Xinzhe and Ma, Kang and Liu, Qiankun and Zou, Yunhao and Fu, Ying}, title = {Multi-Object Tracking in the Dark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {382-392} }
UniHuman: A Unified Model For Editing Human Images in the Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Nannan and Liu, Qing and Singh, Krishna Kumar and Wang, Yilin and Zhang, Jianming and Plummer, Bryan A. and Lin, Zhe}, title = {UniHuman: A Unified Model For Editing Human Images in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2039-2048} }
DiffAgent: Fast and Accurate Text-to-Image API Selection with Large Language Model-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Lirui and Yang, Yue and Zhang, Kaipeng and Shao, Wenqi and Zhang, Yuxin and Qiao, Yu and Luo, Ping and Ji, Rongrong}, title = {DiffAgent: Fast and Accurate Text-to-Image API Selection with Large Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6390-6399} }
In Search of a Data Transformation That Accelerates Neural Field Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Seo_2024_CVPR, author = {Seo, Junwon and Lee, Sangyoon and Kim, Kwang In and Lee, Jaeho}, title = {In Search of a Data Transformation That Accelerates Neural Field Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4830-4839} }
Zero-Painter: Training-Free Layout Control for Text-to-Image Synthesis-
[pdf]
[supp]
[bibtex]@InProceedings{Ohanyan_2024_CVPR, author = {Ohanyan, Marianna and Manukyan, Hayk and Wang, Zhangyang and Navasardyan, Shant and Shi, Humphrey}, title = {Zero-Painter: Training-Free Layout Control for Text-to-Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8764-8774} }
DiffLoc: Diffusion Model for Outdoor LiDAR Localization-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Wen and Yang, Yuyang and Yu, Shangshu and Hu, Guosheng and Wen, Chenglu and Cheng, Ming and Wang, Cheng}, title = {DiffLoc: Diffusion Model for Outdoor LiDAR Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15045-15054} }
Towards 3D Vision with Low-Cost Single-Photon Cameras-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mu_2024_CVPR, author = {Mu, Fangzhou and Sifferman, Carter and Jungerman, Sacha and Li, Yiquan and Han, Mark and Gleicher, Michael and Gupta, Mohit and Li, Yin}, title = {Towards 3D Vision with Low-Cost Single-Photon Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5302-5311} }
WonderJourney: Going from Anywhere to Everywhere-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Hong-Xing and Duan, Haoyi and Hur, Junhwa and Sargent, Kyle and Rubinstein, Michael and Freeman, William T. and Cole, Forrester and Sun, Deqing and Snavely, Noah and Wu, Jiajun and Herrmann, Charles}, title = {WonderJourney: Going from Anywhere to Everywhere}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6658-6667} }
On Scaling Up a Multilingual Vision and Language Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Xi and Djolonga, Josip and Padlewski, Piotr and Mustafa, Basil and Changpinyo, Soravit and Wu, Jialin and Ruiz, Carlos Riquelme and Goodman, Sebastian and Wang, Xiao and Tay, Yi and Shakeri, Siamak and Dehghani, Mostafa and Salz, Daniel and Lucic, Mario and Tschannen, Michael and Nagrani, Arsha and Hu, Hexiang and Joshi, Mandar and Pang, Bo and Montgomery, Ceslee and Pietrzyk, Paulina and Ritter, Marvin and Piergiovanni, AJ and Minderer, Matthias and Pavetic, Filip and Waters, Austin and Li, Gang and Alabdulmohsin, Ibrahim and Beyer, Lucas and Amelot, Julien and Lee, Kenton and Steiner, Andreas Peter and Li, Yang and Keysers, Daniel and Arnab, Anurag and Xu, Yuanzhong and Rong, Keran and Kolesnikov, Alexander and Seyedhosseini, Mojtaba and Angelova, Anelia and Zhai, Xiaohua and Houlsby, Neil and Soricut, Radu}, title = {On Scaling Up a Multilingual Vision and Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14432-14444} }
Day-Night Cross-domain Vehicle Re-identification-
[pdf]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Hongchao and Chen, Jingong and Zheng, Aihua and Wu, Yong and Luo, Yonglong}, title = {Day-Night Cross-domain Vehicle Re-identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12626-12635} }
4D-fy: Text-to-4D Generation Using Hybrid Score Distillation Sampling-
[pdf]
[supp]
[bibtex]@InProceedings{Bahmani_2024_CVPR, author = {Bahmani, Sherwin and Skorokhodov, Ivan and Rong, Victor and Wetzstein, Gordon and Guibas, Leonidas and Wonka, Peter and Tulyakov, Sergey and Park, Jeong Joon and Tagliasacchi, Andrea and Lindell, David B.}, title = {4D-fy: Text-to-4D Generation Using Hybrid Score Distillation Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7996-8006} }
Adversarial Distillation Based on Slack Matching and Attribution Region Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Yin_2024_CVPR, author = {Yin, Shenglin and Xiao, Zhen and Song, Mingxuan and Long, Jieyi}, title = {Adversarial Distillation Based on Slack Matching and Attribution Region Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24605-24614} }
Boosting Spike Camera Image Reconstruction from a Perspective of Dealing with Spike Fluctuations-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Rui and Xiong, Ruiqin and Zhao, Jing and Zhang, Jian and Fan, Xiaopeng and Yu, Zhaofei and Huang, Tiejun}, title = {Boosting Spike Camera Image Reconstruction from a Perspective of Dealing with Spike Fluctuations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24955-24965} }
Text-guided Explorable Image Super-resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gandikota_2024_CVPR, author = {Gandikota, Kanchana Vaishnavi and Chandramouli, Paramanand}, title = {Text-guided Explorable Image Super-resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25900-25911} }
FreeControl: Training-Free Spatial Control of Any Text-to-Image Diffusion Model with Any Condition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mo_2024_CVPR, author = {Mo, Sicheng and Mu, Fangzhou and Lin, Kuan Heng and Liu, Yanli and Guan, Bochen and Li, Yin and Zhou, Bolei}, title = {FreeControl: Training-Free Spatial Control of Any Text-to-Image Diffusion Model with Any Condition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7465-7475} }
VMC: Video Motion Customization using Temporal Attention Adaption for Text-to-Video Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeong_2024_CVPR, author = {Jeong, Hyeonho and Park, Geon Yeong and Ye, Jong Chul}, title = {VMC: Video Motion Customization using Temporal Attention Adaption for Text-to-Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9212-9221} }
Holodeck: Language Guided Generation of 3D Embodied AI Environments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Yue and Sun, Fan-Yun and Weihs, Luca and VanderBilt, Eli and Herrasti, Alvaro and Han, Winson and Wu, Jiajun and Haber, Nick and Krishna, Ranjay and Liu, Lingjie and Callison-Burch, Chris and Yatskar, Mark and Kembhavi, Aniruddha and Clark, Christopher}, title = {Holodeck: Language Guided Generation of 3D Embodied AI Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16227-16237} }
Distilled Datamodel with Reverse Gradient Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2024_CVPR, author = {Ye, Jingwen and Yu, Ruonan and Liu, Songhua and Wang, Xinchao}, title = {Distilled Datamodel with Reverse Gradient Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11954-11963} }
DistriFusion: Distributed Parallel Inference for High-Resolution Diffusion Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Muyang and Cai, Tianle and Cao, Jiaxin and Zhang, Qinsheng and Cai, Han and Bai, Junjie and Jia, Yangqing and Li, Kai and Han, Song}, title = {DistriFusion: Distributed Parallel Inference for High-Resolution Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7183-7193} }
Improving the Generalization of Segmentation Foundation Model under Distribution Shift via Weakly Supervised Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Haojie and Su, Yongyi and Xu, Xun and Jia, Kui}, title = {Improving the Generalization of Segmentation Foundation Model under Distribution Shift via Weakly Supervised Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23385-23395} }
Pseudo Label Refinery for Unsupervised Domain Adaptation on Cross-dataset 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Zhanwei and Chen, Minghao and Xiao, Shuai and Peng, Liang and Li, Hengjia and Lin, Binbin and Li, Ping and Wang, Wenxiao and Wu, Boxi and Cai, Deng}, title = {Pseudo Label Refinery for Unsupervised Domain Adaptation on Cross-dataset 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15291-15300} }
Reconstructing Hands in 3D with Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pavlakos_2024_CVPR, author = {Pavlakos, Georgios and Shan, Dandan and Radosavovic, Ilija and Kanazawa, Angjoo and Fouhey, David and Malik, Jitendra}, title = {Reconstructing Hands in 3D with Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9826-9836} }
AZ-NAS: Assembling Zero-Cost Proxies for Network Architecture Search-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2024_CVPR, author = {Lee, Junghyup and Ham, Bumsub}, title = {AZ-NAS: Assembling Zero-Cost Proxies for Network Architecture Search}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5893-5903} }
Correspondence-Free Non-Rigid Point Set Registration Using Unsupervised Clustering Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Mingyang and Jiang, Jingen and Ma, Lei and Xin, Shiqing and Meng, Gaofeng and Yan, Dong-Ming}, title = {Correspondence-Free Non-Rigid Point Set Registration Using Unsupervised Clustering Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21199-21208} }
Improving Physics-Augmented Continuum Neural Radiance Field-Based Geometry-Agnostic System Identification with Lagrangian Particle Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kaneko_2024_CVPR, author = {Kaneko, Takuhiro}, title = {Improving Physics-Augmented Continuum Neural Radiance Field-Based Geometry-Agnostic System Identification with Lagrangian Particle Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5470-5480} }
BadCLIP: Trigger-Aware Prompt Learning for Backdoor Attacks on CLIP-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bai_2024_CVPR, author = {Bai, Jiawang and Gao, Kuofeng and Min, Shaobo and Xia, Shu-Tao and Li, Zhifeng and Liu, Wei}, title = {BadCLIP: Trigger-Aware Prompt Learning for Backdoor Attacks on CLIP}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24239-24250} }
Beyond Image Super-Resolution for Image Recognition with Task-Driven Perceptual Loss-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Jaeha and Oh, Junghun and Lee, Kyoung Mu}, title = {Beyond Image Super-Resolution for Image Recognition with Task-Driven Perceptual Loss}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2651-2661} }
PELA: Learning Parameter-Efficient Models with Low-Rank Approximation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2024_CVPR, author = {Guo, Yangyang and Wang, Guangzhi and Kankanhalli, Mohan}, title = {PELA: Learning Parameter-Efficient Models with Low-Rank Approximation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15699-15709} }
XCube: Large-Scale 3D Generative Modeling using Sparse Voxel Hierarchies-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2024_CVPR, author = {Ren, Xuanchi and Huang, Jiahui and Zeng, Xiaohui and Museth, Ken and Fidler, Sanja and Williams, Francis}, title = {XCube: Large-Scale 3D Generative Modeling using Sparse Voxel Hierarchies}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4209-4219} }
PixelRNN: In-pixel Recurrent Neural Networks for End-to-end-optimized Perception with Neural Sensors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{So_2024_CVPR, author = {So, Haley M. and Bose, Laurie and Dudek, Piotr and Wetzstein, Gordon}, title = {PixelRNN: In-pixel Recurrent Neural Networks for End-to-end-optimized Perception with Neural Sensors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25233-25244} }
Reconstruction-free Cascaded Adaptive Compressive Sensing-
[pdf]
[bibtex]@InProceedings{Qiu_2024_CVPR, author = {Qiu, Chenxi and Yue, Tao and Hu, Xuemei}, title = {Reconstruction-free Cascaded Adaptive Compressive Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2620-2630} }
Auto-Train-Once: Controller Network Guided Automatic Network Pruning from Scratch-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Xidong and Gao, Shangqian and Zhang, Zeyu and Li, Zhenzhen and Bao, Runxue and Zhang, Yanfu and Wang, Xiaoqian and Huang, Heng}, title = {Auto-Train-Once: Controller Network Guided Automatic Network Pruning from Scratch}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16163-16173} }
Constructing and Exploring Intermediate Domains in Mixed Domain Semi-supervised Medical Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2024_CVPR, author = {Ma, Qinghe and Zhang, Jian and Qi, Lei and Yu, Qian and Shi, Yinghuan and Gao, Yang}, title = {Constructing and Exploring Intermediate Domains in Mixed Domain Semi-supervised Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11642-11651} }
DUSt3R: Geometric 3D Vision Made Easy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Shuzhe and Leroy, Vincent and Cabon, Yohann and Chidlovskii, Boris and Revaud, Jerome}, title = {DUSt3R: Geometric 3D Vision Made Easy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20697-20709} }
From Isolated Islands to Pangea: Unifying Semantic Space for Human Action Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Yong-Lu and Wu, Xiaoqian and Liu, Xinpeng and Wang, Zehao and Dou, Yiming and Ji, Yikun and Zhang, Junyi and Li, Yixing and Lu, Xudong and Tan, Jingru and Lu, Cewu}, title = {From Isolated Islands to Pangea: Unifying Semantic Space for Human Action Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16582-16592} }
Bootstrapping Autonomous Driving Radars with Self-Supervised Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hao_2024_CVPR, author = {Hao, Yiduo and Madani, Sohrab and Guan, Junfeng and Alloulah, Mohammed and Gupta, Saurabh and Hassanieh, Haitham}, title = {Bootstrapping Autonomous Driving Radars with Self-Supervised Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15012-15023} }
Robust Distillation via Untargeted and Targeted Intermediate Adversarial Samples-
[pdf]
[supp]
[bibtex]@InProceedings{Dong_2024_CVPR, author = {Dong, Junhao and Koniusz, Piotr and Chen, Junxi and Wang, Z. Jane and Ong, Yew-Soon}, title = {Robust Distillation via Untargeted and Targeted Intermediate Adversarial Samples}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28432-28442} }
USE: Universal Segment Embeddings for Open-Vocabulary Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Xiaoqi and He, Wenbin and Xuan, Xiwei and Sebastian, Clint and Ono, Jorge Piazentin and Li, Xin and Behpour, Sima and Doan, Thang and Gou, Liang and Shen, Han-Wei and Ren, Liu}, title = {USE: Universal Segment Embeddings for Open-Vocabulary Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4187-4196} }
Functional Diffusion-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Biao and Wonka, Peter}, title = {Functional Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4723-4732} }
Soften to Defend: Towards Adversarial Robustness via Self-Guided Label Refinement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Zhuorong and Yu, Daiwei and Wei, Lina and Jin, Canghong and Zhang, Yun and Chan, Sixian}, title = {Soften to Defend: Towards Adversarial Robustness via Self-Guided Label Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24776-24785} }
Weakly Supervised Monocular 3D Detection with a Single-View Image-
[pdf]
[arXiv]
[bibtex]@InProceedings{Jiang_2024_CVPR, author = {Jiang, Xueying and Jin, Sheng and Lu, Lewei and Zhang, Xiaoqin and Lu, Shijian}, title = {Weakly Supervised Monocular 3D Detection with a Single-View Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10508-10518} }
Pose-Guided Self-Training with Two-Stage Clustering for Unsupervised Landmark Discovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tourani_2024_CVPR, author = {Tourani, Siddharth and Alwheibi, Ahmed and Mahmood, Arif and Khan, Muhammad Haris}, title = {Pose-Guided Self-Training with Two-Stage Clustering for Unsupervised Landmark Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23041-23051} }
Learning from Synthetic Human Group Activities-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chang_2024_CVPR, author = {Chang, Che-Jui and Li, Danrui and Patel, Deep and Goel, Parth and Zhou, Honglu and Moon, Seonghyeon and Sohn, Samuel S. and Yoon, Sejong and Pavlovic, Vladimir and Kapadia, Mubbasir}, title = {Learning from Synthetic Human Group Activities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21922-21932} }
Blind Image Quality Assessment Based on Geometric Order Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Shin_2024_CVPR, author = {Shin, Nyeong-Ho and Lee, Seon-Ho and Kim, Chang-Su}, title = {Blind Image Quality Assessment Based on Geometric Order Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12799-12808} }
Text Grouping Adapter: Adapting Pre-trained Text Detector for Layout Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bi_2024_CVPR, author = {Bi, Tianci and Zhang, Xiaoyi and Zhang, Zhizheng and Xie, Wenxuan and Lan, Cuiling and Lu, Yan and Zheng, Nanning}, title = {Text Grouping Adapter: Adapting Pre-trained Text Detector for Layout Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28150-28159} }
Generalizable Whole Slide Image Classification with Fine-Grained Visual-Semantic Interaction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Hao and Chen, Ying and Chen, Yifei and Yu, Rongshan and Yang, Wenxian and Wang, Liansheng and Ding, Bowen and Han, Yuchen}, title = {Generalizable Whole Slide Image Classification with Fine-Grained Visual-Semantic Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11398-11407} }
THRONE: An Object-based Hallucination Benchmark for the Free-form Generations of Large Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kaul_2024_CVPR, author = {Kaul, Prannay and Li, Zhizhong and Yang, Hao and Dukler, Yonatan and Swaminathan, Ashwin and Taylor, C. J. and Soatto, Stefano}, title = {THRONE: An Object-based Hallucination Benchmark for the Free-form Generations of Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27228-27238} }
Wired Perspectives: Multi-View Wire Art Embraces Generative AI-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qu_2024_CVPR, author = {Qu, Zhiyu and Yang, Lan and Zhang, Honggang and Xiang, Tao and Pang, Kaiyue and Song, Yi-Zhe}, title = {Wired Perspectives: Multi-View Wire Art Embraces Generative AI}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6149-6158} }
LUWA Dataset: Learning Lithic Use-Wear Analysis on Microscopic Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Jing and Fang, Irving and Wu, Hao and Kaushik, Akshat and Rodriguez, Alice and Zhao, Hanwen and Zhang, Juexiao and Zheng, Zhuo and Iovita, Radu and Feng, Chen}, title = {LUWA Dataset: Learning Lithic Use-Wear Analysis on Microscopic Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22563-22573} }
Generalizing 6-DoF Grasp Detection via Domain Prior Knowledge-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2024_CVPR, author = {Ma, Haoxiang and Shi, Modi and Gao, Boyang and Huang, Di}, title = {Generalizing 6-DoF Grasp Detection via Domain Prior Knowledge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18102-18111} }
The Audio-Visual Conversational Graph: From an Egocentric-Exocentric Perspective-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jia_2024_CVPR, author = {Jia, Wenqi and Liu, Miao and Jiang, Hao and Ananthabhotla, Ishwarya and Rehg, James M. and Ithapu, Vamsi Krishna and Gao, Ruohan}, title = {The Audio-Visual Conversational Graph: From an Egocentric-Exocentric Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26396-26405} }
Byzantine-robust Decentralized Federated Learning via Dual-domain Clustering and Trust Bootstrapping-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2024_CVPR, author = {Sun, Peng and Liu, Xinyang and Wang, Zhibo and Liu, Bo}, title = {Byzantine-robust Decentralized Federated Learning via Dual-domain Clustering and Trust Bootstrapping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24756-24765} }
Leveraging Camera Triplets for Efficient and Accurate Structure-from-Motion-
[pdf]
[supp]
[bibtex]@InProceedings{Manam_2024_CVPR, author = {Manam, Lalit and Govindu, Venu Madhav}, title = {Leveraging Camera Triplets for Efficient and Accurate Structure-from-Motion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4959-4968} }
SimDA: Simple Diffusion Adapter for Efficient Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xing_2024_CVPR, author = {Xing, Zhen and Dai, Qi and Hu, Han and Wu, Zuxuan and Jiang, Yu-Gang}, title = {SimDA: Simple Diffusion Adapter for Efficient Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7827-7839} }
Multi-view Aggregation Network for Dichotomous Image Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Qian and Zhao, Xiaoqi and Pang, Youwei and Zhang, Lihe and Lu, Huchuan}, title = {Multi-view Aggregation Network for Dichotomous Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3921-3930} }
A Recipe for Scaling up Text-to-Video Generation with Text-free Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Xiang and Zhang, Shiwei and Yuan, Hangjie and Qing, Zhiwu and Gong, Biao and Zhang, Yingya and Shen, Yujun and Gao, Changxin and Sang, Nong}, title = {A Recipe for Scaling up Text-to-Video Generation with Text-free Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6572-6582} }
Molecular Data Programming: Towards Molecule Pseudo-labeling with Systematic Weak Supervision-
[pdf]
[supp]
[bibtex]@InProceedings{Juan_2024_CVPR, author = {Juan, Xin and Zhou, Kaixiong and Liu, Ninghao and Chen, Tianlong and Wang, Xin}, title = {Molecular Data Programming: Towards Molecule Pseudo-labeling with Systematic Weak Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {308-318} }
RadSimReal: Bridging the Gap Between Synthetic and Real Data in Radar Object Detection With Simulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bialer_2024_CVPR, author = {Bialer, Oded and Haitman, Yuval}, title = {RadSimReal: Bridging the Gap Between Synthetic and Real Data in Radar Object Detection With Simulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15407-15416} }
No More Ambiguity in 360deg Room Layout via Bi-Layout Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Tsai_2024_CVPR, author = {Tsai, Yu-Ju and Jhang, Jin-Cheng and Zheng, Jingjing and Wang, Wei and Chen, Albert Y. C. and Sun, Min and Kuo, Cheng-Hao and Yang, Ming-Hsuan}, title = {No More Ambiguity in 360deg Room Layout via Bi-Layout Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28056-28065} }
Residual Denoising Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Jiawei and Wang, Qiang and Fan, Huijie and Wang, Yinong and Tang, Yandong and Qu, Liangqiong}, title = {Residual Denoising Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2773-2783} }
Towards Accurate and Robust Architectures via Neural Architecture Search-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ou_2024_CVPR, author = {Ou, Yuwei and Feng, Yuqi and Sun, Yanan}, title = {Towards Accurate and Robust Architectures via Neural Architecture Search}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5967-5976} }
Closely Interactive Human Reconstruction with Proxemics and Physics-Guided Adaption-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Buzhen and Li, Chen and Xu, Chongyang and Pan, Liang and Wang, Yangang and Lee, Gim Hee}, title = {Closely Interactive Human Reconstruction with Proxemics and Physics-Guided Adaption}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1011-1021} }
A Noisy Elephant in the Room: Is Your Out-of-Distribution Detector Robust to Label Noise?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Humblot-Renaux_2024_CVPR, author = {Humblot-Renaux, Galadrielle and Escalera, Sergio and Moeslund, Thomas B.}, title = {A Noisy Elephant in the Room: Is Your Out-of-Distribution Detector Robust to Label Noise?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22626-22636} }
VideoMAC: Video Masked Autoencoders Meet ConvNets-
[pdf]
[arXiv]
[bibtex]@InProceedings{Pei_2024_CVPR, author = {Pei, Gensheng and Chen, Tao and Jiang, Xiruo and Liu, Huafeng and Sun, Zeren and Yao, Yazhou}, title = {VideoMAC: Video Masked Autoencoders Meet ConvNets}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22733-22743} }
Taming Stable Diffusion for Text to 360 Panorama Image Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Cheng and Wu, Qianyi and Gambardella, Camilo Cruz and Huang, Xiaoshui and Phung, Dinh and Ouyang, Wanli and Cai, Jianfei}, title = {Taming Stable Diffusion for Text to 360 Panorama Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6347-6357} }
3DSFLabelling: Boosting 3D Scene Flow Estimation by Pseudo Auto-labelling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2024_CVPR, author = {Jiang, Chaokang and Wang, Guangming and Liu, Jiuming and Wang, Hesheng and Ma, Zhuang and Liu, Zhenqiang and Liang, Zhujin and Shan, Yi and Du, Dalong}, title = {3DSFLabelling: Boosting 3D Scene Flow Estimation by Pseudo Auto-labelling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15173-15183} }
Unsigned Orthogonal Distance Fields: An Accurate Neural Implicit Representation for Diverse 3D Shapes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2024_CVPR, author = {Lu, Yujie and Wan, Long and Ding, Nayu and Wang, Yulong and Shen, Shuhan and Cai, Shen and Gao, Lin}, title = {Unsigned Orthogonal Distance Fields: An Accurate Neural Implicit Representation for Diverse 3D Shapes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20551-20560} }
Modular Blind Video Quality Assessment-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wen_2024_CVPR, author = {Wen, Wen and Li, Mu and Zhang, Yabin and Liao, Yiting and Li, Junlin and Zhang, Li and Ma, Kede}, title = {Modular Blind Video Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2763-2772} }
Question Aware Vision Transformer for Multimodal Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ganz_2024_CVPR, author = {Ganz, Roy and Kittenplon, Yair and Aberdam, Aviad and Ben Avraham, Elad and Nuriel, Oren and Mazor, Shai and Litman, Ron}, title = {Question Aware Vision Transformer for Multimodal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13861-13871} }
OST: Refining Text Knowledge with Optimal Spatio-Temporal Descriptor for General Video Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Tongjia and Yu, Hongshan and Yang, Zhengeng and Li, Zechuan and Sun, Wei and Chen, Chen}, title = {OST: Refining Text Knowledge with Optimal Spatio-Temporal Descriptor for General Video Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18888-18898} }
Habitat Synthetic Scenes Dataset (HSSD-200): An Analysis of 3D Scene Scale and Realism Tradeoffs for ObjectGoal Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Khanna_2024_CVPR, author = {Khanna, Mukul and Mao, Yongsen and Jiang, Hanxiao and Haresh, Sanjay and Shacklett, Brennan and Batra, Dhruv and Clegg, Alexander and Undersander, Eric and Chang, Angel X. and Savva, Manolis}, title = {Habitat Synthetic Scenes Dataset (HSSD-200): An Analysis of 3D Scene Scale and Realism Tradeoffs for ObjectGoal Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16384-16393} }
OA-CNNs: Omni-Adaptive Sparse CNNs for 3D Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Peng_2024_CVPR, author = {Peng, Bohao and Wu, Xiaoyang and Jiang, Li and Chen, Yukang and Zhao, Hengshuang and Tian, Zhuotao and Jia, Jiaya}, title = {OA-CNNs: Omni-Adaptive Sparse CNNs for 3D Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21305-21315} }
RELI11D: A Comprehensive Multimodal Human Motion Dataset and Method-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2024_CVPR, author = {Yan, Ming and Zhang, Yan and Cai, Shuqiang and Fan, Shuqi and Lin, Xincheng and Dai, Yudi and Shen, Siqi and Wen, Chenglu and Xu, Lan and Ma, Yuexin and Wang, Cheng}, title = {RELI11D: A Comprehensive Multimodal Human Motion Dataset and Method}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2250-2262} }
Generative Image Dynamics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Zhengqi and Tucker, Richard and Snavely, Noah and Holynski, Aleksander}, title = {Generative Image Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24142-24153} }
One-Class Face Anti-spoofing via Spoof Cue Map-Guided Feature Learning-
[pdf]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Pei-Kai and Chiang, Cheng-Hsuan and Chen, Tzu-Hsien and Chong, Jun-Xiong and Liu, Tyng-Luh and Hsu, Chiou-Ting}, title = {One-Class Face Anti-spoofing via Spoof Cue Map-Guided Feature Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {277-286} }
On the Test-Time Zero-Shot Generalization of Vision-Language Models: Do We Really Need Prompt Learning?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zanella_2024_CVPR, author = {Zanella, Maxime and Ben Ayed, Ismail}, title = {On the Test-Time Zero-Shot Generalization of Vision-Language Models: Do We Really Need Prompt Learning?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23783-23793} }
InteractDiffusion: Interaction Control in Text-to-Image Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hoe_2024_CVPR, author = {Hoe, Jiun Tian and Jiang, Xudong and Chan, Chee Seng and Tan, Yap-Peng and Hu, Weipeng}, title = {InteractDiffusion: Interaction Control in Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6180-6189} }
NViST: In the Wild New View Synthesis from a Single Image with Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jang_2024_CVPR, author = {Jang, Wonbong and Agapito, Lourdes}, title = {NViST: In the Wild New View Synthesis from a Single Image with Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10181-10193} }
Beyond Text: Frozen Large Language Models in Visual Signal Comprehension-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Lei and Wei, Fangyun and Lu, Yanye}, title = {Beyond Text: Frozen Large Language Models in Visual Signal Comprehension}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27047-27057} }
Rotated Multi-Scale Interaction Network for Referring Remote Sensing Image Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Sihan and Ma, Yiwei and Zhang, Xiaoqing and Wang, Haowei and Ji, Jiayi and Sun, Xiaoshuai and Ji, Rongrong}, title = {Rotated Multi-Scale Interaction Network for Referring Remote Sensing Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26658-26668} }
GLACE: Global Local Accelerated Coordinate Encoding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Fangjinhua and Jiang, Xudong and Galliani, Silvano and Vogel, Christoph and Pollefeys, Marc}, title = {GLACE: Global Local Accelerated Coordinate Encoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21562-21571} }
Emergent Open-Vocabulary Semantic Segmentation from Off-the-shelf Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2024_CVPR, author = {Luo, Jiayun and Khandelwal, Siddhesh and Sigal, Leonid and Li, Boyang}, title = {Emergent Open-Vocabulary Semantic Segmentation from Off-the-shelf Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4029-4040} }
Localization Is All You Evaluate: Data Leakage in Online Mapping Datasets and How to Fix It-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lilja_2024_CVPR, author = {Lilja, Adam and Fu, Junsheng and Stenborg, Erik and Hammarstrand, Lars}, title = {Localization Is All You Evaluate: Data Leakage in Online Mapping Datasets and How to Fix It}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22150-22159} }
Alchemist: Parametric Control of Material Properties with Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sharma_2024_CVPR, author = {Sharma, Prafull and Jampani, Varun and Li, Yuanzhen and Jia, Xuhui and Lagun, Dmitry and Durand, Fredo and Freeman, Bill and Matthews, Mark}, title = {Alchemist: Parametric Control of Material Properties with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24130-24141} }
Step Differences in Instructional Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nagarajan_2024_CVPR, author = {Nagarajan, Tushar and Torresani, Lorenzo}, title = {Step Differences in Instructional Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18740-18750} }
Depth Anything: Unleashing the Power of Large-Scale Unlabeled Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Lihe and Kang, Bingyi and Huang, Zilong and Xu, Xiaogang and Feng, Jiashi and Zhao, Hengshuang}, title = {Depth Anything: Unleashing the Power of Large-Scale Unlabeled Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10371-10381} }
SelfPose3d: Self-Supervised Multi-Person Multi-View 3d Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Srivastav_2024_CVPR, author = {Srivastav, Vinkle and Chen, Keqi and Padoy, Nicolas}, title = {SelfPose3d: Self-Supervised Multi-Person Multi-View 3d Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2502-2512} }
MoDE: CLIP Data Experts via Clustering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2024_CVPR, author = {Ma, Jiawei and Huang, Po-Yao and Xie, Saining and Li, Shang-Wen and Zettlemoyer, Luke and Chang, Shih-Fu and Yih, Wen-Tau and Xu, Hu}, title = {MoDE: CLIP Data Experts via Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26354-26363} }
Joint2Human: High-Quality 3D Human Generation via Compact Spherical Embedding of 3D Joints-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Muxin and Feng, Qiao and Su, Zhuo and Wen, Chao and Xue, Zhou and Li, Kun}, title = {Joint2Human: High-Quality 3D Human Generation via Compact Spherical Embedding of 3D Joints}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1429-1438} }
Prompt-Free Diffusion: Taking "Text" out of Text-to-Image Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Xingqian and Guo, Jiayi and Wang, Zhangyang and Huang, Gao and Essa, Irfan and Shi, Humphrey}, title = {Prompt-Free Diffusion: Taking ''Text'' out of Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8682-8692} }
MPOD123: One Image to 3D Content Generation Using Mask-enhanced Progressive Outline-to-Detail Optimization-
[pdf]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Jimin and Wang, Tianbao and Jin, Tao and Zhang, Shengyu and Fu, Dongjie and Wang, Zhe and Lyu, Jiangjing and Lv, Chengfei and Niu, Chaoyue and Yu, Zhou and Zhao, Zhou and Wu, Fei}, title = {MPOD123: One Image to 3D Content Generation Using Mask-enhanced Progressive Outline-to-Detail Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10682-10692} }
Multi-agent Long-term 3D Human Pose Forecasting via Interaction-aware Trajectory Conditioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeong_2024_CVPR, author = {Jeong, Jaewoo and Park, Daehee and Yoon, Kuk-Jin}, title = {Multi-agent Long-term 3D Human Pose Forecasting via Interaction-aware Trajectory Conditioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1617-1628} }
UnionFormer: Unified-Learning Transformer with Multi-View Representation for Image Manipulation Detection and Localization-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Shuaibo and Ma, Wei and Guo, Jianwei and Xu, Shibiao and Li, Benchong and Zhang, Xiaopeng}, title = {UnionFormer: Unified-Learning Transformer with Multi-View Representation for Image Manipulation Detection and Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12523-12533} }
Situational Awareness Matters in 3D Vision Language Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Man_2024_CVPR, author = {Man, Yunze and Gui, Liang-Yan and Wang, Yu-Xiong}, title = {Situational Awareness Matters in 3D Vision Language Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13678-13688} }
RCBEVDet: Radar-camera Fusion in Bird's Eye View for 3D Object Detection-
[pdf]
[bibtex]@InProceedings{Lin_2024_CVPR, author = {Lin, Zhiwei and Liu, Zhe and Xia, Zhongyu and Wang, Xinhao and Wang, Yongtao and Qi, Shengxiang and Dong, Yang and Dong, Nan and Zhang, Le and Zhu, Ce}, title = {RCBEVDet: Radar-camera Fusion in Bird's Eye View for 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14928-14937} }
CLOAF: CoLlisiOn-Aware Human Flow-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Davydov_2024_CVPR, author = {Davydov, Andrey and Engilberge, Martin and Salzmann, Mathieu and Fua, Pascal}, title = {CLOAF: CoLlisiOn-Aware Human Flow}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1176-1185} }
Hybrid Functional Maps for Crease-Aware Non-Isometric Shape Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bastian_2024_CVPR, author = {Bastian, Lennart and Xie, Yizheng and Navab, Nassir and L\"ahner, Zorah}, title = {Hybrid Functional Maps for Crease-Aware Non-Isometric Shape Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3313-3323} }
Density-Guided Semi-Supervised 3D Semantic Segmentation with Dual-Space Hardness Sampling-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Jianan and Dong, Qiulei}, title = {Density-Guided Semi-Supervised 3D Semantic Segmentation with Dual-Space Hardness Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3260-3269} }
Adaptive Softassign via Hadamard-Equipped Sinkhorn-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shen_2024_CVPR, author = {Shen, Binrui and Niu, Qiang and Zhu, Shengxin}, title = {Adaptive Softassign via Hadamard-Equipped Sinkhorn}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17638-17647} }
Re-thinking Data Availability Attacks Against Deep Neural Networks-
[pdf]
[supp]
[bibtex]@InProceedings{Fang_2024_CVPR, author = {Fang, Bin and Li, Bo and Wu, Shuang and Ding, Shouhong and Yi, Ran and Ma, Lizhuang}, title = {Re-thinking Data Availability Attacks Against Deep Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12215-12224} }
ElasticDiffusion: Training-free Arbitrary Size Image Generation through Global-Local Content Separation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Haji-Ali_2024_CVPR, author = {Haji-Ali, Moayed and Balakrishnan, Guha and Ordonez, Vicente}, title = {ElasticDiffusion: Training-free Arbitrary Size Image Generation through Global-Local Content Separation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6603-6612} }
Locally Adaptive Neural 3D Morphable Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tarasiou_2024_CVPR, author = {Tarasiou, Michail and Potamias, Rolandos Alexandros and O'Sullivan, Eimear and Ploumpis, Stylianos and Zafeiriou, Stefanos}, title = {Locally Adaptive Neural 3D Morphable Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1867-1876} }
ICON: Incremental CONfidence for Joint Pose and Radiance Field Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Weiyao and Gleize, Pierre and Tang, Hao and Chen, Xingyu and Liang, Kevin J and Feiszli, Matt}, title = {ICON: Incremental CONfidence for Joint Pose and Radiance Field Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5406-5417} }
Learned Scanpaths Aid Blind Panoramic Video Quality Assessment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2024_CVPR, author = {Fan, Kanglong and Wen, Wen and Li, Mu and Peng, Yifan and Ma, Kede}, title = {Learned Scanpaths Aid Blind Panoramic Video Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2599-2608} }
FineSports: A Multi-person Hierarchical Sports Video Dataset for Fine-grained Action Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Jinglin and Zhao, Guohao and Yin, Sibo and Zhou, Wenhao and Peng, Yuxin}, title = {FineSports: A Multi-person Hierarchical Sports Video Dataset for Fine-grained Action Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21773-21782} }
SHiNe: Semantic Hierarchy Nexus for Open-vocabulary Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Mingxuan and Hayes, Tyler L. and Ricci, Elisa and Csurka, Gabriela and Volpi, Riccardo}, title = {SHiNe: Semantic Hierarchy Nexus for Open-vocabulary Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16634-16644} }
TI2V-Zero: Zero-Shot Image Conditioning for Text-to-Video Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Ni_2024_CVPR, author = {Ni, Haomiao and Egger, Bernhard and Lohit, Suhas and Cherian, Anoop and Wang, Ye and Koike-Akino, Toshiaki and Huang, Sharon X. and Marks, Tim K.}, title = {TI2V-Zero: Zero-Shot Image Conditioning for Text-to-Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9015-9025} }
Ranking Distillation for Open-Ended Video Question Answering with Insufficient Labels-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liang_2024_CVPR, author = {Liang, Tianming and Tan, Chaolei and Xia, Beihao and Zheng, Wei-Shi and Hu, Jian-Fang}, title = {Ranking Distillation for Open-Ended Video Question Answering with Insufficient Labels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13161-13170} }
GARField: Group Anything with Radiance Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Chung Min and Wu, Mingxuan and Kerr, Justin and Goldberg, Ken and Tancik, Matthew and Kanazawa, Angjoo}, title = {GARField: Group Anything with Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21530-21539} }
Depth-Aware Concealed Crop Detection in Dense Agricultural Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Liqiong and Yang, Jinyu and Zhang, Yanfu and Wang, Fangyi and Zheng, Feng}, title = {Depth-Aware Concealed Crop Detection in Dense Agricultural Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17201-17211} }
Learning Equi-angular Representations for Online Continual Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Seo_2024_CVPR, author = {Seo, Minhyuk and Koh, Hyunseo and Jeung, Wonje and Lee, Minjae and Kim, San and Lee, Hankook and Cho, Sungjun and Choi, Sungik and Kim, Hyunwoo and Choi, Jonghyun}, title = {Learning Equi-angular Representations for Online Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23933-23942} }
iToF-flow-based High Frame Rate Depth Imaging-
[pdf]
[bibtex]@InProceedings{Meng_2024_CVPR, author = {Meng, Yu and Xue, Zhou and Chang, Xu and Hu, Xuemei and Yue, Tao}, title = {iToF-flow-based High Frame Rate Depth Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4929-4938} }
Solving the Catastrophic Forgetting Problem in Generalized Category Discovery-
[pdf]
[supp]
[bibtex]@InProceedings{Cao_2024_CVPR, author = {Cao, Xinzi and Zheng, Xiawu and Wang, Guanhong and Yu, Weijiang and Shen, Yunhang and Li, Ke and Lu, Yutong and Tian, Yonghong}, title = {Solving the Catastrophic Forgetting Problem in Generalized Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16880-16889} }
Data-Efficient Unsupervised Interpolation Without Any Intermediate Frame for 4D Medical Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, JungEun and Yoon, Hangyul and Park, Geondo and Kim, Kyungsu and Yang, Eunho}, title = {Data-Efficient Unsupervised Interpolation Without Any Intermediate Frame for 4D Medical Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11353-11364} }
POCE: Primal Policy Optimization with Conservative Estimation for Multi-constraint Offline Reinforcement Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Guan_2024_CVPR, author = {Guan, Jiayi and Shen, Li and Zhou, Ao and Li, Lusong and Hu, Han and He, Xiaodong and Chen, Guang and Jiang, Changjun}, title = {POCE: Primal Policy Optimization with Conservative Estimation for Multi-constraint Offline Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26243-26253} }
Learning the 3D Fauna of the Web-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Zizhang and Litvak, Dor and Li, Ruining and Zhang, Yunzhi and Jakab, Tomas and Rupprecht, Christian and Wu, Shangzhe and Vedaldi, Andrea and Wu, Jiajun}, title = {Learning the 3D Fauna of the Web}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9752-9762} }
Masked Spatial Propagation Network for Sparsity-Adaptive Depth Refinement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jun_2024_CVPR, author = {Jun, Jinyoung and Lee, Jae-Han and Kim, Chang-Su}, title = {Masked Spatial Propagation Network for Sparsity-Adaptive Depth Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19768-19778} }
LISA: Reasoning Segmentation via Large Language Model-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lai_2024_CVPR, author = {Lai, Xin and Tian, Zhuotao and Chen, Yukang and Li, Yanwei and Yuan, Yuhui and Liu, Shu and Jia, Jiaya}, title = {LISA: Reasoning Segmentation via Large Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9579-9589} }
Relightful Harmonization: Lighting-aware Portrait Background Replacement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2024_CVPR, author = {Ren, Mengwei and Xiong, Wei and Yoon, Jae Shin and Shu, Zhixin and Zhang, Jianming and Jung, HyunJoon and Gerig, Guido and Zhang, He}, title = {Relightful Harmonization: Lighting-aware Portrait Background Replacement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6452-6462} }
Bridging the Gap: A Unified Video Comprehension Framework for Moment Retrieval and Highlight Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2024_CVPR, author = {Xiao, Yicheng and Luo, Zhuoyan and Liu, Yong and Ma, Yue and Bian, Hengwei and Ji, Yatai and Yang, Yujiu and Li, Xiu}, title = {Bridging the Gap: A Unified Video Comprehension Framework for Moment Retrieval and Highlight Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18709-18719} }
MuseChat: A Conversational Music Recommendation System for Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dong_2024_CVPR, author = {Dong, Zhikang and Liu, Xiulong and Chen, Bin and Polak, Pawel and Zhang, Peng}, title = {MuseChat: A Conversational Music Recommendation System for Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12775-12785} }
Mitigating Motion Blur in Neural Radiance Fields with Events and Frames-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cannici_2024_CVPR, author = {Cannici, Marco and Scaramuzza, Davide}, title = {Mitigating Motion Blur in Neural Radiance Fields with Events and Frames}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9286-9296} }
C3Net: Compound Conditioned ControlNet for Multimodal Content Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Juntao and Liu, Yuehuai and Tai, Yu-Wing and Tang, Chi-Keung}, title = {C3Net: Compound Conditioned ControlNet for Multimodal Content Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26886-26895} }
Device-Wise Federated Network Pruning-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2024_CVPR, author = {Gao, Shangqian and Li, Junyi and Zhang, Zeyu and Zhang, Yanfu and Cai, Weidong and Huang, Heng}, title = {Device-Wise Federated Network Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12342-12352} }
Adapt Before Comparison: A New Perspective on Cross-Domain Few-Shot Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Herzog_2024_CVPR, author = {Herzog, Jonas}, title = {Adapt Before Comparison: A New Perspective on Cross-Domain Few-Shot Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23605-23615} }
TokenHMR: Advancing Human Mesh Recovery with a Tokenized Pose Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dwivedi_2024_CVPR, author = {Dwivedi, Sai Kumar and Sun, Yu and Patel, Priyanka and Feng, Yao and Black, Michael J.}, title = {TokenHMR: Advancing Human Mesh Recovery with a Tokenized Pose Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1323-1333} }
MoReVQA: Exploring Modular Reasoning Models for Video Question Answering-
[pdf]
[arXiv]
[bibtex]@InProceedings{Min_2024_CVPR, author = {Min, Juhong and Buch, Shyamal and Nagrani, Arsha and Cho, Minsu and Schmid, Cordelia}, title = {MoReVQA: Exploring Modular Reasoning Models for Video Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13235-13245} }
Low-Rank Rescaled Vision Transformer Fine-Tuning: A Residual Design Approach-
[pdf]
[supp]
[bibtex]@InProceedings{Dong_2024_CVPR, author = {Dong, Wei and Zhang, Xing and Chen, Bihui and Yan, Dawei and Lin, Zhijun and Yan, Qingsen and Wang, Peng and Yang, Yang}, title = {Low-Rank Rescaled Vision Transformer Fine-Tuning: A Residual Design Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16101-16110} }
FaceCom: Towards High-fidelity 3D Facial Shape Completion via Optimization and Inpainting Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Yinglong and Wu, Hongyu and Wang, Xiaogang and Qin, Qingzhao and Zhao, Yijiao and Wang, Yong and Hao, Aimin}, title = {FaceCom: Towards High-fidelity 3D Facial Shape Completion via Optimization and Inpainting Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2177-2186} }
Distribution-aware Knowledge Prototyping for Non-exemplar Lifelong Person Re-identification-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Kunlun and Zou, Xu and Peng, Yuxin and Zhou, Jiahuan}, title = {Distribution-aware Knowledge Prototyping for Non-exemplar Lifelong Person Re-identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16604-16613} }
LightOctree: Lightweight 3D Spatially-Coherent Indoor Lighting Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Xuecan and Xiao, Shibang and Liang, Xiaohui}, title = {LightOctree: Lightweight 3D Spatially-Coherent Indoor Lighting Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4536-4545} }
Generating Enhanced Negatives for Training Language-Based Object Detectors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Shiyu and Zhao, Long and G, Vijay Kumar B and Suh, Yumin and Metaxas, Dimitris N. and Chandraker, Manmohan and Schulter, Samuel}, title = {Generating Enhanced Negatives for Training Language-Based Object Detectors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13592-13602} }
Insect-Foundation: A Foundation Model and Large-scale 1M Dataset for Visual Insect Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Nguyen_2024_CVPR, author = {Nguyen, Hoang-Quan and Truong, Thanh-Dat and Nguyen, Xuan Bac and Dowling, Ashley and Li, Xin and Luu, Khoa}, title = {Insect-Foundation: A Foundation Model and Large-scale 1M Dataset for Visual Insect Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21945-21955} }
Data-Efficient Multimodal Fusion on a Single GPU-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vouitsis_2024_CVPR, author = {Vouitsis, No\"el and Liu, Zhaoyan and Gorti, Satya Krishna and Villecroze, Valentin and Cresswell, Jesse C. and Yu, Guangwei and Loaiza-Ganem, Gabriel and Volkovs, Maksims}, title = {Data-Efficient Multimodal Fusion on a Single GPU}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27239-27251} }
FedSelect: Personalized Federated Learning with Customized Selection of Parameters for Fine-Tuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tamirisa_2024_CVPR, author = {Tamirisa, Rishub and Xie, Chulin and Bao, Wenxuan and Zhou, Andy and Arel, Ron and Shamsian, Aviv}, title = {FedSelect: Personalized Federated Learning with Customized Selection of Parameters for Fine-Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23985-23994} }
FaceLift: Semi-supervised 3D Facial Landmark Localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ferman_2024_CVPR, author = {Ferman, David and Garrido, Pablo and Bharaj, Gaurav}, title = {FaceLift: Semi-supervised 3D Facial Landmark Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1781-1791} }
PSDPM: Prototype-based Secondary Discriminative Pixels Mining for Weakly Supervised Semantic Segmentation-
[pdf]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Xinqiao and Yang, Ziqian and Dai, Tianhong and Zhang, Bingfeng and Xiao, Jimin}, title = {PSDPM: Prototype-based Secondary Discriminative Pixels Mining for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3437-3446} }
Bidirectional Multi-Scale Implicit Neural Representations for Image Deraining-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Xiang and Pan, Jinshan and Dong, Jiangxin}, title = {Bidirectional Multi-Scale Implicit Neural Representations for Image Deraining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25627-25636} }
Frozen CLIP: A Strong Backbone for Weakly Supervised Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Bingfeng and Yu, Siyue and Wei, Yunchao and Zhao, Yao and Xiao, Jimin}, title = {Frozen CLIP: A Strong Backbone for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3796-3806} }
FedAS: Bridging Inconsistency in Personalized Federated Learning-
[pdf]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Xiyuan and Huang, Wenke and Ye, Mang}, title = {FedAS: Bridging Inconsistency in Personalized Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11986-11995} }
LAFS: Landmark-based Facial Self-supervised Learning for Face Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2024_CVPR, author = {Sun, Zhonglin and Feng, Chen and Patras, Ioannis and Tzimiropoulos, Georgios}, title = {LAFS: Landmark-based Facial Self-supervised Learning for Face Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1639-1649} }
SED: A Simple Encoder-Decoder for Open-Vocabulary Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2024_CVPR, author = {Xie, Bin and Cao, Jiale and Xie, Jin and Khan, Fahad Shahbaz and Pang, Yanwei}, title = {SED: A Simple Encoder-Decoder for Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3426-3436} }
GPLD3D: Latent Diffusion of 3D Shape Generative Models by Enforcing Geometric and Physical Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Dong_2024_CVPR, author = {Dong, Yuan and Zuo, Qi and Gu, Xiaodong and Yuan, Weihao and Zhao, Zhengyi and Dong, Zilong and Bo, Liefeng and Huang, Qixing}, title = {GPLD3D: Latent Diffusion of 3D Shape Generative Models by Enforcing Geometric and Physical Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {56-66} }
Enhancing Quality of Compressed Images by Mitigating Enhancement Bias Towards Compression Domain-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xing_2024_CVPR, author = {Xing, Qunliang and Xu, Mai and Li, Shengxi and Deng, Xin and Zheng, Meisong and Liu, Huaida and Chen, Ying}, title = {Enhancing Quality of Compressed Images by Mitigating Enhancement Bias Towards Compression Domain}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25501-25511} }
LangSplat: 3D Language Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qin_2024_CVPR, author = {Qin, Minghan and Li, Wanhua and Zhou, Jiawei and Wang, Haoqian and Pfister, Hanspeter}, title = {LangSplat: 3D Language Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20051-20060} }
MoST: Multi-Modality Scene Tokenization for Motion Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mu_2024_CVPR, author = {Mu, Norman and Ji, Jingwei and Yang, Zhenpei and Harada, Nate and Tang, Haotian and Chen, Kan and Qi, Charles R. and Ge, Runzhou and Goel, Kratarth and Yang, Zoey and Ettinger, Scott and Al-Rfou, Rami and Anguelov, Dragomir and Zhou, Yin}, title = {MoST: Multi-Modality Scene Tokenization for Motion Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14988-14999} }
PIGEON: Predicting Image Geolocations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Haas_2024_CVPR, author = {Haas, Lukas and Skreta, Michal and Alberti, Silas and Finn, Chelsea}, title = {PIGEON: Predicting Image Geolocations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12893-12902} }
Improving Spectral Snapshot Reconstruction with Spectral-Spatial Rectification-
[pdf]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Jiancheng and Zeng, Haijin and Chen, Yongyong and Yu, Dengxiu and Zhao, Yin-Ping}, title = {Improving Spectral Snapshot Reconstruction with Spectral-Spatial Rectification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25817-25826} }
Self-correcting LLM-controlled Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Tsung-Han and Lian, Long and Gonzalez, Joseph E. and Li, Boyi and Darrell, Trevor}, title = {Self-correcting LLM-controlled Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6327-6336} }
PACER+: On-Demand Pedestrian Animation Controller in Driving Scenarios-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Jingbo and Luo, Zhengyi and Yuan, Ye and Li, Yixuan and Dai, Bo}, title = {PACER+: On-Demand Pedestrian Animation Controller in Driving Scenarios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {718-728} }
LTM: Lightweight Textured Mesh Extraction and Refinement of Large Unbounded Scenes for Efficient Storage and Real-time Rendering-
[pdf]
[supp]
[bibtex]@InProceedings{Choi_2024_CVPR, author = {Choi, Jaehoon and Shah, Rajvi and Li, Qinbo and Wang, Yipeng and Saraf, Ayush and Kim, Changil and Huang, Jia-Bin and Manocha, Dinesh and Alsisan, Suhib and Kopf, Johannes}, title = {LTM: Lightweight Textured Mesh Extraction and Refinement of Large Unbounded Scenes for Efficient Storage and Real-time Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5053-5063} }
Don't Drop Your Samples! Coherence-Aware Training Benefits Conditional Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Dufour_2024_CVPR, author = {Dufour, Nicolas and Besnier, Victor and Kalogeiton, Vicky and Picard, David}, title = {Don't Drop Your Samples! Coherence-Aware Training Benefits Conditional Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6264-6273} }
Flow-Guided Online Stereo Rectification for Wide Baseline Stereo-
[pdf]
[supp]
[bibtex]@InProceedings{Kumar_2024_CVPR, author = {Kumar, Anush and Mannan, Fahim and Jafari, Omid Hosseini and Li, Shile and Heide, Felix}, title = {Flow-Guided Online Stereo Rectification for Wide Baseline Stereo}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15375-15385} }
DNGaussian: Optimizing Sparse-View 3D Gaussian Radiance Fields with Global-Local Depth Normalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Jiahe and Zhang, Jiawei and Bai, Xiao and Zheng, Jin and Ning, Xin and Zhou, Jun and Gu, Lin}, title = {DNGaussian: Optimizing Sparse-View 3D Gaussian Radiance Fields with Global-Local Depth Normalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20775-20785} }
ColorPCR: Color Point Cloud Registration with Multi-Stage Geometric-Color Fusion-
[pdf]
[supp]
[bibtex]@InProceedings{Mu_2024_CVPR, author = {Mu, Juncheng and Bie, Lin and Du, Shaoyi and Gao, Yue}, title = {ColorPCR: Color Point Cloud Registration with Multi-Stage Geometric-Color Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21061-21070} }
HomoFormer: Homogenized Transformer for Image Shadow Removal-
[pdf]
[supp]
[bibtex]@InProceedings{Xiao_2024_CVPR, author = {Xiao, Jie and Fu, Xueyang and Zhu, Yurui and Li, Dong and Huang, Jie and Zhu, Kai and Zha, Zheng-Jun}, title = {HomoFormer: Homogenized Transformer for Image Shadow Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25617-25626} }
What If the TV Was Off? Examining Counterfactual Reasoning Abilities of Multi-modal Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Letian and Zhai, Xiaotong and Zhao, Zhongkai and Zong, Yongshuo and Wen, Xin and Zhao, Bingchen}, title = {What If the TV Was Off? Examining Counterfactual Reasoning Abilities of Multi-modal Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21853-21862} }
What Do You See in Vehicle? Comprehensive Vision Solution for In-Vehicle Gaze Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2024_CVPR, author = {Cheng, Yihua and Zhu, Yaning and Wang, Zongji and Hao, Hongquan and Liu, Yongwei and Cheng, Shiqing and Wang, Xi and Chang, Hyung Jin}, title = {What Do You See in Vehicle? Comprehensive Vision Solution for In-Vehicle Gaze Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1556-1565} }
Driving Everywhere with Large Language Model Policy Adaptation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Boyi and Wang, Yue and Mao, Jiageng and Ivanovic, Boris and Veer, Sushant and Leung, Karen and Pavone, Marco}, title = {Driving Everywhere with Large Language Model Policy Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14948-14957} }
UFORecon: Generalizable Sparse-View Surface Reconstruction from Arbitrary and Unfavorable Sets-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Na_2024_CVPR, author = {Na, Youngju and Kim, Woo Jae and Han, Kyu Beom and Ha, Suhyeon and Yoon, Sung-Eui}, title = {UFORecon: Generalizable Sparse-View Surface Reconstruction from Arbitrary and Unfavorable Sets}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5094-5104} }
FAR: Flexible Accurate and Robust 6DoF Relative Camera Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rockwell_2024_CVPR, author = {Rockwell, Chris and Kulkarni, Nilesh and Jin, Linyi and Park, Jeong Joon and Johnson, Justin and Fouhey, David F.}, title = {FAR: Flexible Accurate and Robust 6DoF Relative Camera Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19854-19864} }
eTraM: Event-based Traffic Monitoring Dataset-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Verma_2024_CVPR, author = {Verma, Aayush Atul and Chakravarthi, Bharatesh and Vaghela, Arpitsinh and Wei, Hua and Yang, Yezhou}, title = {eTraM: Event-based Traffic Monitoring Dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22637-22646} }
MoCha-Stereo: Motif Channel Attention Network for Stereo Matching-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Ziyang and Long, Wei and Yao, He and Zhang, Yongjun and Wang, Bingshu and Qin, Yongbin and Wu, Jia}, title = {MoCha-Stereo: Motif Channel Attention Network for Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27768-27777} }
Koala: Key Frame-Conditioned Long Video-LLM-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tan_2024_CVPR, author = {Tan, Reuben and Sun, Ximeng and Hu, Ping and Wang, Jui-hsien and Deilamsalehy, Hanieh and Plummer, Bryan A. and Russell, Bryan and Saenko, Kate}, title = {Koala: Key Frame-Conditioned Long Video-LLM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13581-13591} }
Extend Your Own Correspondences: Unsupervised Distant Point Cloud Registration by Progressive Distance Extension-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Quan and Zhu, Hongzi and Wang, Zhenxi and Zhou, Yunsong and Chang, Shan and Guo, Minyi}, title = {Extend Your Own Correspondences: Unsupervised Distant Point Cloud Registration by Progressive Distance Extension}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20816-20826} }
HallusionBench: An Advanced Diagnostic Suite for Entangled Language Hallucination and Visual Illusion in Large Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guan_2024_CVPR, author = {Guan, Tianrui and Liu, Fuxiao and Wu, Xiyang and Xian, Ruiqi and Li, Zongxia and Liu, Xiaoyu and Wang, Xijun and Chen, Lichang and Huang, Furong and Yacoob, Yaser and Manocha, Dinesh and Zhou, Tianyi}, title = {HallusionBench: An Advanced Diagnostic Suite for Entangled Language Hallucination and Visual Illusion in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14375-14385} }
ID-like Prompt Learning for Few-Shot Out-of-Distribution Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bai_2024_CVPR, author = {Bai, Yichen and Han, Zongbo and Cao, Bing and Jiang, Xiaoheng and Hu, Qinghua and Zhang, Changqing}, title = {ID-like Prompt Learning for Few-Shot Out-of-Distribution Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17480-17489} }
Breathing Life Into Sketches Using Text-to-Video Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gal_2024_CVPR, author = {Gal, Rinon and Vinker, Yael and Alaluf, Yuval and Bermano, Amit and Cohen-Or, Daniel and Shamir, Ariel and Chechik, Gal}, title = {Breathing Life Into Sketches Using Text-to-Video Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4325-4336} }
Multi-modal Learning for Geospatial Vegetation Forecasting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Benson_2024_CVPR, author = {Benson, Vitus and Robin, Claire and Requena-Mesa, Christian and Alonso, Lazaro and Carvalhais, Nuno and Cort\'es, Jos\'e and Gao, Zhihan and Linscheid, Nora and Weynants, M\'elanie and Reichstein, Markus}, title = {Multi-modal Learning for Geospatial Vegetation Forecasting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27788-27799} }
Learning Diffusion Texture Priors for Image Restoration-
[pdf]
[bibtex]@InProceedings{Ye_2024_CVPR, author = {Ye, Tian and Chen, Sixiang and Chai, Wenhao and Xing, Zhaohu and Qin, Jing and Lin, Ge and Zhu, Lei}, title = {Learning Diffusion Texture Priors for Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2524-2534} }
Bring Event into RGB and LiDAR: Hierarchical Visual-Motion Fusion for Scene Flow-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Hanyu and Chang, Yi and Shi, Zhiwei}, title = {Bring Event into RGB and LiDAR: Hierarchical Visual-Motion Fusion for Scene Flow}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26477-26486} }
Entangled View-Epipolar Information Aggregation for Generalizable Neural Radiance Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Min_2024_CVPR, author = {Min, Zhiyuan and Luo, Yawei and Yang, Wei and Wang, Yuesong and Yang, Yi}, title = {Entangled View-Epipolar Information Aggregation for Generalizable Neural Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4906-4916} }
Jack of All Tasks Master of Many: Designing General-Purpose Coarse-to-Fine Vision-Language Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pramanick_2024_CVPR, author = {Pramanick, Shraman and Han, Guangxing and Hou, Rui and Nag, Sayan and Lim, Ser-Nam and Ballas, Nicolas and Wang, Qifan and Chellappa, Rama and Almahairi, Amjad}, title = {Jack of All Tasks Master of Many: Designing General-Purpose Coarse-to-Fine Vision-Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14076-14088} }
MMVP: A Multimodal MoCap Dataset with Vision and Pressure Sensors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, He and Ren, Shenghao and Yuan, Haolei and Zhao, Jianhui and Li, Fan and Sun, Shuangpeng and Liang, Zhenghao and Yu, Tao and Shen, Qiu and Cao, Xun}, title = {MMVP: A Multimodal MoCap Dataset with Vision and Pressure Sensors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21842-21852} }
YolOOD: Utilizing Object Detection Concepts for Multi-Label Out-of-Distribution Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zolfi_2024_CVPR, author = {Zolfi, Alon and Amit, Guy and Baras, Amit and Koda, Satoru and Morikawa, Ikuya and Elovici, Yuval and Shabtai, Asaf}, title = {YolOOD: Utilizing Object Detection Concepts for Multi-Label Out-of-Distribution Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5788-5797} }
SchurVINS: Schur Complement-Based Lightweight Visual Inertial Navigation System-
[pdf]
[arXiv]
[bibtex]@InProceedings{Fan_2024_CVPR, author = {Fan, Yunfei and Zhao, Tianyu and Wang, Guidong}, title = {SchurVINS: Schur Complement-Based Lightweight Visual Inertial Navigation System}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17964-17973} }
Collaborating Foundation Models for Domain Generalized Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Benigmim_2024_CVPR, author = {Benigmim, Yasser and Roy, Subhankar and Essid, Slim and Kalogeiton, Vicky and Lathuili\`ere, St\'ephane}, title = {Collaborating Foundation Models for Domain Generalized Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3108-3119} }
Towards Variable and Coordinated Holistic Co-Speech Motion Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Yifei and Cao, Qiong and Wen, Yandong and Jiang, Huaiguang and Ding, Changxing}, title = {Towards Variable and Coordinated Holistic Co-Speech Motion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1566-1576} }
JoAPR: Cleaning the Lens of Prompt Learning for Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2024_CVPR, author = {Guo, Yuncheng and Gu, Xiaodong}, title = {JoAPR: Cleaning the Lens of Prompt Learning for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28695-28705} }
AllSpark: Reborn Labeled Features from Unlabeled in Transformer for Semi-Supervised Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Haonan and Zhang, Qixiang and Li, Yi and Li, Xiaomeng}, title = {AllSpark: Reborn Labeled Features from Unlabeled in Transformer for Semi-Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3627-3636} }
Open-Vocabulary 3D Semantic Segmentation with Foundation Models-
[pdf]
[bibtex]@InProceedings{Jiang_2024_CVPR, author = {Jiang, Li and Shi, Shaoshuai and Schiele, Bernt}, title = {Open-Vocabulary 3D Semantic Segmentation with Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21284-21294} }
SIGNeRF: Scene Integrated Generation for Neural Radiance Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dihlmann_2024_CVPR, author = {Dihlmann, Jan-Niklas and Engelhardt, Andreas and Lensch, Hendrik}, title = {SIGNeRF: Scene Integrated Generation for Neural Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6679-6688} }
ViP-LLaVA: Making Large Multimodal Models Understand Arbitrary Visual Prompts-
[pdf]
[supp]
[bibtex]@InProceedings{Cai_2024_CVPR, author = {Cai, Mu and Liu, Haotian and Mustikovela, Siva Karthik and Meyer, Gregory P. and Chai, Yuning and Park, Dennis and Lee, Yong Jae}, title = {ViP-LLaVA: Making Large Multimodal Models Understand Arbitrary Visual Prompts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12914-12923} }
OVER-NAV: Elevating Iterative Vision-and-Language Navigation with Open-Vocabulary Detection and StructurEd Representation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Ganlong and Li, Guanbin and Chen, Weikai and Yu, Yizhou}, title = {OVER-NAV: Elevating Iterative Vision-and-Language Navigation with Open-Vocabulary Detection and StructurEd Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16296-16306} }
1-Lipschitz Layers Compared: Memory Speed and Certifiable Robustness-
[pdf]
[supp]
[bibtex]@InProceedings{Prach_2024_CVPR, author = {Prach, Bernd and Brau, Fabio and Buttazzo, Giorgio and Lampert, Christoph H.}, title = {1-Lipschitz Layers Compared: Memory Speed and Certifiable Robustness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24574-24583} }
All Rivers Run to the Sea: Private Learning with Asymmetric Flows-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Niu_2024_CVPR, author = {Niu, Yue and Ali, Ramy E. and Prakash, Saurav and Avestimehr, Salman}, title = {All Rivers Run to the Sea: Private Learning with Asymmetric Flows}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12353-12362} }
Generating Illustrated Instructions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Menon_2024_CVPR, author = {Menon, Sachit and Misra, Ishan and Girdhar, Rohit}, title = {Generating Illustrated Instructions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6274-6284} }
Construct to Associate: Cooperative Context Learning for Domain Adaptive Point Cloud Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Guangrui}, title = {Construct to Associate: Cooperative Context Learning for Domain Adaptive Point Cloud Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27917-27926} }
Robust Image Denoising through Adversarial Frequency Mixup-
[pdf]
[supp]
[bibtex]@InProceedings{Ryou_2024_CVPR, author = {Ryou, Donghun and Ha, Inju and Yoo, Hyewon and Kim, Dongwan and Han, Bohyung}, title = {Robust Image Denoising through Adversarial Frequency Mixup}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2723-2732} }
HandBooster: Boosting 3D Hand-Mesh Reconstruction by Conditional Synthesis and Sampling of Hand-Object Interactions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Hao and Li, Haipeng and Wang, Yinqiao and Liu, Shuaicheng and Fu, Chi-Wing}, title = {HandBooster: Boosting 3D Hand-Mesh Reconstruction by Conditional Synthesis and Sampling of Hand-Object Interactions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10159-10169} }
A-Teacher: Asymmetric Network for 3D Semi-Supervised Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Hanshi and Zhang, Zhipeng and Gao, Jin and Hu, Weiming}, title = {A-Teacher: Asymmetric Network for 3D Semi-Supervised Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14978-14987} }
GoMVS: Geometrically Consistent Cost Aggregation for Multi-View Stereo-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Jiang and Li, Rui and Xu, Haofei and Zhao, Wenxun and Zhu, Yu and Sun, Jinqiu and Zhang, Yanning}, title = {GoMVS: Geometrically Consistent Cost Aggregation for Multi-View Stereo}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20207-20216} }
Evaluating Transferability in Retrieval Tasks: An Approach Using MMD and Kernel Methods-
[pdf]
[supp]
[bibtex]@InProceedings{Dai_2024_CVPR, author = {Dai, Mengyu and Raffiee, Amir Hossein and Jain, Aashish and Correa, Joshua}, title = {Evaluating Transferability in Retrieval Tasks: An Approach Using MMD and Kernel Methods}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22390-22400} }
AnyScene: Customized Image Synthesis with Composited Foreground-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Ruidong and Wang, Lanjun and Nie, Weizhi and Zhang, Yongdong and Liu, An-An}, title = {AnyScene: Customized Image Synthesis with Composited Foreground}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8724-8733} }
Training Generative Image Super-Resolution Models by Wavelet-Domain Losses Enables Better Control of Artifacts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Korkmaz_2024_CVPR, author = {Korkmaz, Cansu and Tekalp, A. Murat and Dogan, Zafer}, title = {Training Generative Image Super-Resolution Models by Wavelet-Domain Losses Enables Better Control of Artifacts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5926-5936} }
Visual Objectification in Films: Towards a New AI Task for Video Interpretation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tores_2024_CVPR, author = {Tores, Julie and Sassatelli, Lucile and Wu, Hui-Yin and Bergman, Clement and Andolfi, L\'ea and Ecrement, Victor and Precioso, Fr\'ed\'eric and Devars, Thierry and Guaresi, Magali and Julliard, Virginie and Lecossais, Sarah}, title = {Visual Objectification in Films: Towards a New AI Task for Video Interpretation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10864-10874} }
OMG-Seg: Is One Model Good Enough For All Segmentation?-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Xiangtai and Yuan, Haobo and Li, Wei and Ding, Henghui and Wu, Size and Zhang, Wenwei and Li, Yining and Chen, Kai and Loy, Chen Change}, title = {OMG-Seg: Is One Model Good Enough For All Segmentation?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27948-27959} }
BiTT: Bi-directional Texture Reconstruction of Interacting Two Hands from a Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Minje and Kim, Tae-Kyun}, title = {BiTT: Bi-directional Texture Reconstruction of Interacting Two Hands from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10726-10735} }
DetCLIPv3: Towards Versatile Generative Open-vocabulary Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yao_2024_CVPR, author = {Yao, Lewei and Pi, Renjie and Han, Jianhua and Liang, Xiaodan and Xu, Hang and Zhang, Wei and Li, Zhenguo and Xu, Dan}, title = {DetCLIPv3: Towards Versatile Generative Open-vocabulary Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27391-27401} }
UVEB: A Large-scale Benchmark and Baseline Towards Real-World Underwater Video Enhancement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2024_CVPR, author = {Xie, Yaofeng and Kong, Lingwei and Chen, Kai and Zheng, Ziqiang and Yu, Xiao and Yu, Zhibin and Zheng, Bing}, title = {UVEB: A Large-scale Benchmark and Baseline Towards Real-World Underwater Video Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22358-22367} }
Learning to Localize Objects Improves Spatial Reasoning in Visual-LLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ranasinghe_2024_CVPR, author = {Ranasinghe, Kanchana and Shukla, Satya Narayan and Poursaeed, Omid and Ryoo, Michael S. and Lin, Tsung-Yu}, title = {Learning to Localize Objects Improves Spatial Reasoning in Visual-LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12977-12987} }
Monocular Identity-Conditioned Facial Reflectance Reconstruction-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ren_2024_CVPR, author = {Ren, Xingyu and Deng, Jiankang and Cheng, Yuhao and Guo, Jia and Ma, Chao and Yan, Yichao and Zhu, Wenhan and Yang, Xiaokang}, title = {Monocular Identity-Conditioned Facial Reflectance Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {885-895} }
C3: High-Performance and Low-Complexity Neural Compression from a Single Image or Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Hyunjik and Bauer, Matthias and Theis, Lucas and Schwarz, Jonathan Richard and Dupont, Emilien}, title = {C3: High-Performance and Low-Complexity Neural Compression from a Single Image or Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9347-9358} }
Self-Distilled Masked Auto-Encoders are Efficient Video Anomaly Detectors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ristea_2024_CVPR, author = {Ristea, Nicolae-C?t?lin and Croitoru, Florinel-Alin and Ionescu, Radu Tudor and Popescu, Marius and Khan, Fahad Shahbaz and Shah, Mubarak}, title = {Self-Distilled Masked Auto-Encoders are Efficient Video Anomaly Detectors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15984-15995} }
Revisiting Non-Autoregressive Transformers for Efficient Image Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ni_2024_CVPR, author = {Ni, Zanlin and Wang, Yulin and Zhou, Renping and Guo, Jiayi and Hu, Jinyi and Liu, Zhiyuan and Song, Shiji and Yao, Yuan and Huang, Gao}, title = {Revisiting Non-Autoregressive Transformers for Efficient Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7007-7016} }
Distilling Vision-Language Models on Millions of Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Yue and Zhao, Long and Zhou, Xingyi and Wu, Jialin and Chu, Chun-Te and Miao, Hui and Schroff, Florian and Adam, Hartwig and Liu, Ting and Gong, Boqing and Krahenbuhl, Philipp and Yuan, Liangzhe}, title = {Distilling Vision-Language Models on Millions of Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13106-13116} }
ANIM: Accurate Neural Implicit Model for Human Reconstruction from a single RGB-D Image-
[pdf]
[supp]
[bibtex]@InProceedings{Pesavento_2024_CVPR, author = {Pesavento, Marco and Xu, Yuanlu and Sarafianos, Nikolaos and Maier, Robert and Wang, Ziyan and Yao, Chun-Han and Volino, Marco and Boyer, Edmond and Hilton, Adrian and Tung, Tony}, title = {ANIM: Accurate Neural Implicit Model for Human Reconstruction from a single RGB-D Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5448-5458} }
Real-Time Simulated Avatar from Head-Mounted Sensors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2024_CVPR, author = {Luo, Zhengyi and Cao, Jinkun and Khirodkar, Rawal and Winkler, Alexander and Kitani, Kris and Xu, Weipeng}, title = {Real-Time Simulated Avatar from Head-Mounted Sensors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {571-581} }
Discovering Syntactic Interaction Clues for Human-Object Interaction Detection-
[pdf]
[bibtex]@InProceedings{Luo_2024_CVPR, author = {Luo, Jinguo and Ren, Weihong and Jiang, Weibo and Chen, Xi'ai and Wang, Qiang and Han, Zhi and Liu, Honghai}, title = {Discovering Syntactic Interaction Clues for Human-Object Interaction Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28212-28222} }
Inter-X: Towards Versatile Human-Human Interaction Analysis-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Liang and Lv, Xintao and Yan, Yichao and Jin, Xin and Wu, Shuwen and Xu, Congsheng and Liu, Yifan and Zhou, Yizhou and Rao, Fengyun and Sheng, Xingdong and Liu, Yunhui and Zeng, Wenjun and Yang, Xiaokang}, title = {Inter-X: Towards Versatile Human-Human Interaction Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22260-22271} }
Generalized Predictive Model for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Jiazhi and Gao, Shenyuan and Qiu, Yihang and Chen, Li and Li, Tianyu and Dai, Bo and Chitta, Kashyap and Wu, Penghao and Zeng, Jia and Luo, Ping and Zhang, Jun and Geiger, Andreas and Qiao, Yu and Li, Hongyang}, title = {Generalized Predictive Model for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14662-14672} }
FACT: Frame-Action Cross-Attention Temporal Modeling for Efficient Action Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2024_CVPR, author = {Lu, Zijia and Elhamifar, Ehsan}, title = {FACT: Frame-Action Cross-Attention Temporal Modeling for Efficient Action Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18175-18185} }
Test-Time Zero-Shot Temporal Action Localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liberatori_2024_CVPR, author = {Liberatori, Benedetta and Conti, Alessandro and Rota, Paolo and Wang, Yiming and Ricci, Elisa}, title = {Test-Time Zero-Shot Temporal Action Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18720-18729} }
AM-RADIO: Agglomerative Vision Foundation Model Reduce All Domains Into One-
[pdf]
[supp]
[bibtex]@InProceedings{Ranzinger_2024_CVPR, author = {Ranzinger, Mike and Heinrich, Greg and Kautz, Jan and Molchanov, Pavlo}, title = {AM-RADIO: Agglomerative Vision Foundation Model Reduce All Domains Into One}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12490-12500} }
MaskClustering: View Consensus based Mask Graph Clustering for Open-Vocabulary 3D Instance Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2024_CVPR, author = {Yan, Mi and Zhang, Jiazhao and Zhu, Yan and Wang, He}, title = {MaskClustering: View Consensus based Mask Graph Clustering for Open-Vocabulary 3D Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28274-28284} }
Seamless Human Motion Composition with Blended Positional Encodings-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Barquero_2024_CVPR, author = {Barquero, German and Escalera, Sergio and Palmero, Cristina}, title = {Seamless Human Motion Composition with Blended Positional Encodings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {457-469} }
PeerAiD: Improving Adversarial Distillation from a Specialized Peer Tutor-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jung_2024_CVPR, author = {Jung, Jaewon and Jang, Hongsun and Song, Jaeyong and Lee, Jinho}, title = {PeerAiD: Improving Adversarial Distillation from a Specialized Peer Tutor}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24482-24491} }
Scaling Laws for Data Filtering-- Data Curation cannot be Compute Agnostic-
[pdf]
[supp]
[bibtex]@InProceedings{Goyal_2024_CVPR, author = {Goyal, Sachin and Maini, Pratyush and Lipton, Zachary C. and Raghunathan, Aditi and Kolter, J. Zico}, title = {Scaling Laws for Data Filtering-- Data Curation cannot be Compute Agnostic}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22702-22711} }
FastMAC: Stochastic Spectral Sampling of Correspondence Graph-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yifei and Zhao, Hao and Li, Hongyang and Chen, Siheng}, title = {FastMAC: Stochastic Spectral Sampling of Correspondence Graph}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17857-17867} }
FedUV: Uniformity and Variance for Heterogeneous Federated Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Son_2024_CVPR, author = {Son, Ha Min and Kim, Moon-Hyun and Chung, Tai-Myoung and Huang, Chao and Liu, Xin}, title = {FedUV: Uniformity and Variance for Heterogeneous Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5863-5872} }
FedSOL: Stabilized Orthogonal Learning with Proximal Restrictions in Federated Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2024_CVPR, author = {Lee, Gihun and Jeong, Minchan and Kim, Sangmook and Oh, Jaehoon and Yun, Se-Young}, title = {FedSOL: Stabilized Orthogonal Learning with Proximal Restrictions in Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12512-12522} }
GAvatar: Animatable 3D Gaussian Avatars with Implicit Mesh Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2024_CVPR, author = {Yuan, Ye and Li, Xueting and Huang, Yangyi and De Mello, Shalini and Nagano, Koki and Kautz, Jan and Iqbal, Umar}, title = {GAvatar: Animatable 3D Gaussian Avatars with Implicit Mesh Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {896-905} }
Beyond Average: Individualized Visual Scanpath Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Xianyu and Jiang, Ming and Zhao, Qi}, title = {Beyond Average: Individualized Visual Scanpath Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25420-25431} }
A Category Agnostic Model for Visual Rearrangment-
[pdf]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Yuyi and Song, Xinhang and Li, Weijie and Wang, Xiaohan and Jiang, Shuqiang}, title = {A Category Agnostic Model for Visual Rearrangment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16457-16466} }
Grounding Everything: Emerging Localization Properties in Vision-Language Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bousselham_2024_CVPR, author = {Bousselham, Walid and Petersen, Felix and Ferrari, Vittorio and Kuehne, Hilde}, title = {Grounding Everything: Emerging Localization Properties in Vision-Language Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3828-3837} }
Seeing Motion at Nighttime with an Event Camera-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Haoyue and Peng, Shihan and Zhu, Lin and Chang, Yi and Zhou, Hanyu and Yan, Luxin}, title = {Seeing Motion at Nighttime with an Event Camera}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25648-25658} }
Representing Part-Whole Hierarchies in Foundation Models by Learning Localizability Composability and Decomposability from Anatomy via Self Supervision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Taher_2024_CVPR, author = {Taher, Mohammad Reza Hosseinzadeh and Gotway, Michael B. and Liang, Jianming}, title = {Representing Part-Whole Hierarchies in Foundation Models by Learning Localizability Composability and Decomposability from Anatomy via Self Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11269-11281} }
Efficient Test-Time Adaptation of Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Karmanov_2024_CVPR, author = {Karmanov, Adilbek and Guan, Dayan and Lu, Shijian and El Saddik, Abdulmotaleb and Xing, Eric}, title = {Efficient Test-Time Adaptation of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14162-14171} }
Eyes Wide Shut? Exploring the Visual Shortcomings of Multimodal LLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tong_2024_CVPR, author = {Tong, Shengbang and Liu, Zhuang and Zhai, Yuexiang and Ma, Yi and LeCun, Yann and Xie, Saining}, title = {Eyes Wide Shut? Exploring the Visual Shortcomings of Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9568-9578} }
Mean-Shift Feature Transformer-
[pdf]
[supp]
[bibtex]@InProceedings{Kobayashi_2024_CVPR, author = {Kobayashi, Takumi}, title = {Mean-Shift Feature Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6047-6056} }
Domain Separation Graph Neural Networks for Saliency Object Ranking-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Zijian and Lu, Jun and Han, Jing and Bai, Lianfa and Zhang, Yi and Zhao, Zhuang and Song, Siyang}, title = {Domain Separation Graph Neural Networks for Saliency Object Ranking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3964-3974} }
Mind Marginal Non-Crack Regions: Clustering-Inspired Representation Learning for Crack Segmentation-
[pdf]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Zhuangzhuang and Lai, Zhuonan and Chen, Jie and Li, Jianqiang}, title = {Mind Marginal Non-Crack Regions: Clustering-Inspired Representation Learning for Crack Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12698-12708} }
FISBe: A Real-World Benchmark Dataset for Instance Segmentation of Long-Range Thin Filamentous Structures-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mais_2024_CVPR, author = {Mais, Lisa and Hirsch, Peter and Managan, Claire and Kandarpa, Ramya and Rumberger, Josef Lorenz and Reinke, Annika and Maier-Hein, Lena and Ihrke, Gudrun and Kainmueller, Dagmar}, title = {FISBe: A Real-World Benchmark Dataset for Instance Segmentation of Long-Range Thin Filamentous Structures}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22249-22259} }
RegionGPT: Towards Region Understanding Vision Language Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2024_CVPR, author = {Guo, Qiushan and De Mello, Shalini and Yin, Hongxu and Byeon, Wonmin and Cheung, Ka Chun and Yu, Yizhou and Luo, Ping and Liu, Sifei}, title = {RegionGPT: Towards Region Understanding Vision Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13796-13806} }
LL3DA: Visual Interactive Instruction Tuning for Omni-3D Understanding Reasoning and Planning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Sijin and Chen, Xin and Zhang, Chi and Li, Mingsheng and Yu, Gang and Fei, Hao and Zhu, Hongyuan and Fan, Jiayuan and Chen, Tao}, title = {LL3DA: Visual Interactive Instruction Tuning for Omni-3D Understanding Reasoning and Planning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26428-26438} }
4D Gaussian Splatting for Real-Time Dynamic Scene Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Guanjun and Yi, Taoran and Fang, Jiemin and Xie, Lingxi and Zhang, Xiaopeng and Wei, Wei and Liu, Wenyu and Tian, Qi and Wang, Xinggang}, title = {4D Gaussian Splatting for Real-Time Dynamic Scene Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20310-20320} }
RAM-Avatar: Real-time Photo-Realistic Avatar from Monocular Videos with Full-body Control-
[pdf]
[supp]
[bibtex]@InProceedings{Deng_2024_CVPR, author = {Deng, Xiang and Zheng, Zerong and Zhang, Yuxiang and Sun, Jingxiang and Xu, Chao and Yang, Xiaodong and Wang, Lizhen and Liu, Yebin}, title = {RAM-Avatar: Real-time Photo-Realistic Avatar from Monocular Videos with Full-body Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1996-2007} }
Selective-Stereo: Adaptive Frequency Information Selection for Stereo Matching-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Xianqi and Xu, Gangwei and Jia, Hao and Yang, Xin}, title = {Selective-Stereo: Adaptive Frequency Information Selection for Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19701-19710} }
PerAda: Parameter-Efficient Federated Learning Personalization with Generalization Guarantees-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2024_CVPR, author = {Xie, Chulin and Huang, De-An and Chu, Wenda and Xu, Daguang and Xiao, Chaowei and Li, Bo and Anandkumar, Anima}, title = {PerAda: Parameter-Efficient Federated Learning Personalization with Generalization Guarantees}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23838-23848} }
MAFA: Managing False Negatives for Vision-Language Pre-training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Byun_2024_CVPR, author = {Byun, Jaeseok and Kim, Dohoon and Moon, Taesup}, title = {MAFA: Managing False Negatives for Vision-Language Pre-training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27314-27324} }
Video Prediction by Modeling Videos as Continuous Multi-Dimensional Processes-
[pdf]
[supp]
[bibtex]@InProceedings{Shrivastava_2024_CVPR, author = {Shrivastava, Gaurav and Shrivastava, Abhinav}, title = {Video Prediction by Modeling Videos as Continuous Multi-Dimensional Processes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7236-7245} }
PICTURE: PhotorealistIC virtual Try-on from UnconstRained dEsigns-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ning_2024_CVPR, author = {Ning, Shuliang and Wang, Duomin and Qin, Yipeng and Jin, Zirong and Wang, Baoyuan and Han, Xiaoguang}, title = {PICTURE: PhotorealistIC virtual Try-on from UnconstRained dEsigns}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6976-6985} }
InfLoRA: Interference-Free Low-Rank Adaptation for Continual Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2024_CVPR, author = {Liang, Yan-Shuo and Li, Wu-Jun}, title = {InfLoRA: Interference-Free Low-Rank Adaptation for Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23638-23647} }
Towards Robust 3D Pose Transfer with Adversarial Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Haoyu and Tang, Hao and Adeli, Ehsan and Zhao, Guoying}, title = {Towards Robust 3D Pose Transfer with Adversarial Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2295-2304} }
Error Detection in Egocentric Procedural Task Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2024_CVPR, author = {Lee, Shih-Po and Lu, Zijia and Zhang, Zekun and Hoai, Minh and Elhamifar, Ehsan}, title = {Error Detection in Egocentric Procedural Task Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18655-18666} }
EAGLE: Eigen Aggregation Learning for Object-Centric Unsupervised Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Chanyoung and Han, Woojung and Ju, Dayun and Hwang, Seong Jae}, title = {EAGLE: Eigen Aggregation Learning for Object-Centric Unsupervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3523-3533} }
AVID: Any-Length Video Inpainting with Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Zhixing and Wu, Bichen and Wang, Xiaoyan and Luo, Yaqiao and Zhang, Luxin and Zhao, Yinan and Vajda, Peter and Metaxas, Dimitris and Yu, Licheng}, title = {AVID: Any-Length Video Inpainting with Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7162-7172} }
NoiseCollage: A Layout-Aware Text-to-Image Diffusion Model Based on Noise Cropping and Merging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shirakawa_2024_CVPR, author = {Shirakawa, Takahiro and Uchida, Seiichi}, title = {NoiseCollage: A Layout-Aware Text-to-Image Diffusion Model Based on Noise Cropping and Merging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8921-8930} }
Uncertainty-Guided Never-Ending Learning to Drive-
[pdf]
[bibtex]@InProceedings{Lai_2024_CVPR, author = {Lai, Lei and Ohn-Bar, Eshed and Arora, Sanjay and Yi, John Seon Keun}, title = {Uncertainty-Guided Never-Ending Learning to Drive}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15088-15098} }
FakeInversion: Learning to Detect Images from Unseen Text-to-Image Models by Inverting Stable Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cazenavette_2024_CVPR, author = {Cazenavette, George and Sud, Avneesh and Leung, Thomas and Usman, Ben}, title = {FakeInversion: Learning to Detect Images from Unseen Text-to-Image Models by Inverting Stable Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10759-10769} }
PLGSLAM: Progressive Neural Scene Represenation with Local to Global Bundle Adjustment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deng_2024_CVPR, author = {Deng, Tianchen and Shen, Guole and Qin, Tong and Wang, Jianyu and Zhao, Wentao and Wang, Jingchuan and Wang, Danwei and Chen, Weidong}, title = {PLGSLAM: Progressive Neural Scene Represenation with Local to Global Bundle Adjustment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19657-19666} }
Multi-Task Dense Prediction via Mixture of Low-Rank Experts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Yuqi and Jiang, Peng-Tao and Hou, Qibin and Zhang, Hao and Chen, Jinwei and Li, Bo}, title = {Multi-Task Dense Prediction via Mixture of Low-Rank Experts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27927-27937} }
Binding Touch to Everything: Learning Unified Multimodal Tactile Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Fengyu and Feng, Chao and Chen, Ziyang and Park, Hyoungseob and Wang, Daniel and Dou, Yiming and Zeng, Ziyao and Chen, Xien and Gangopadhyay, Rit and Owens, Andrew and Wong, Alex}, title = {Binding Touch to Everything: Learning Unified Multimodal Tactile Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26340-26353} }
Attribute-Guided Pedestrian Retrieval: Bridging Person Re-ID with Internal Attribute Variability-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Yan and Zhang, Zhang and Wu, Qiang and Zhong, Yi and Wang, Liang}, title = {Attribute-Guided Pedestrian Retrieval: Bridging Person Re-ID with Internal Attribute Variability}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17689-17699} }
Text Is MASS: Modeling as Stochastic Embedding for Text-Video Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Jiamian and Sun, Guohao and Wang, Pichao and Liu, Dongfang and Dianat, Sohail and Rabbani, Majid and Rao, Raghuveer and Tao, Zhiqiang}, title = {Text Is MASS: Modeling as Stochastic Embedding for Text-Video Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16551-16560} }
Your Transferability Barrier is Fragile: Free-Lunch for Transferring the Non-Transferable Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Hong_2024_CVPR, author = {Hong, Ziming and Shen, Li and Liu, Tongliang}, title = {Your Transferability Barrier is Fragile: Free-Lunch for Transferring the Non-Transferable Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28805-28815} }
Arbitrary Motion Style Transfer with Multi-condition Motion Latent Diffusion Model-
[pdf]
[supp]
[bibtex]@InProceedings{Song_2024_CVPR, author = {Song, Wenfeng and Jin, Xingliang and Li, Shuai and Chen, Chenglizhao and Hao, Aimin and Hou, Xia and Li, Ning and Qin, Hong}, title = {Arbitrary Motion Style Transfer with Multi-condition Motion Latent Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {821-830} }
Know Your Neighbors: Improving Single-View Reconstruction via Spatial Vision-Language Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Rui and Fischer, Tobias and Segu, Mattia and Pollefeys, Marc and Van Gool, Luc and Tombari, Federico}, title = {Know Your Neighbors: Improving Single-View Reconstruction via Spatial Vision-Language Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9848-9858} }
Complementing Event Streams and RGB Frames for Hand Mesh Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2024_CVPR, author = {Jiang, Jianping and Zhou, Xinyu and Wang, Bingxuan and Deng, Xiaoming and Xu, Chao and Shi, Boxin}, title = {Complementing Event Streams and RGB Frames for Hand Mesh Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24944-24954} }
Empowering Resampling Operation for Ultra-High-Definition Image Enhancement with Model-Aware Guidance-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Wei and Huang, Jie and Li, Bing and Zheng, Kaiwen and Zhu, Qi and Zhou, Man and Zhao, Feng}, title = {Empowering Resampling Operation for Ultra-High-Definition Image Enhancement with Model-Aware Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25722-25731} }
ViT-CoMer: Vision Transformer with Convolutional Multi-scale Feature Interaction for Dense Predictions-
[pdf]
[bibtex]@InProceedings{Xia_2024_CVPR, author = {Xia, Chunlong and Wang, Xinliang and Lv, Feng and Hao, Xin and Shi, Yifeng}, title = {ViT-CoMer: Vision Transformer with Convolutional Multi-scale Feature Interaction for Dense Predictions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5493-5502} }
PromptCoT: Align Prompt Distribution via Adapted Chain-of-Thought-
[pdf]
[supp]
[bibtex]@InProceedings{Yao_2024_CVPR, author = {Yao, Junyi and Liu, Yijiang and Dong, Zhen and Guo, Mingfei and Hu, Helan and Keutzer, Kurt and Du, Li and Zhou, Daquan and Zhang, Shanghang}, title = {PromptCoT: Align Prompt Distribution via Adapted Chain-of-Thought}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7027-7037} }
Hallucination Augmented Contrastive Learning for Multimodal Large Language Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2024_CVPR, author = {Jiang, Chaoya and Xu, Haiyang and Dong, Mengfan and Chen, Jiaxing and Ye, Wei and Yan, Ming and Ye, Qinghao and Zhang, Ji and Huang, Fei and Zhang, Shikun}, title = {Hallucination Augmented Contrastive Learning for Multimodal Large Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27036-27046} }
Preserving Fairness Generalization in Deepfake Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2024_CVPR, author = {Lin, Li and He, Xinan and Ju, Yan and Wang, Xin and Ding, Feng and Hu, Shu}, title = {Preserving Fairness Generalization in Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16815-16825} }
Anomaly Score: Evaluating Generative Models and Individual Generated Images based on Complexity and Vulnerability-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hwang_2024_CVPR, author = {Hwang, Jaehui and Lee, Junghyuk and Lee, Jong-Seok}, title = {Anomaly Score: Evaluating Generative Models and Individual Generated Images based on Complexity and Vulnerability}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8754-8763} }
Structure-Aware Sparse-View X-ray 3D Reconstruction-
[pdf]
[bibtex]@InProceedings{Cai_2024_CVPR, author = {Cai, Yuanhao and Wang, Jiahao and Yuille, Alan and Zhou, Zongwei and Wang, Angtian}, title = {Structure-Aware Sparse-View X-ray 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11174-11183} }
Dexterous Grasp Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Guo-Hao and Wei, Yi-Lin and Zheng, Dian and Wu, Xiao-Ming and Zheng, Wei-Shi}, title = {Dexterous Grasp Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17933-17942} }
Cooperation Does Matter: Exploring Multi-Order Bilateral Relations for Audio-Visual Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Qi and Nie, Xing and Li, Tong and Gao, Pengfei and Guo, Ying and Zhen, Cheng and Yan, Pengfei and Xiang, Shiming}, title = {Cooperation Does Matter: Exploring Multi-Order Bilateral Relations for Audio-Visual Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27134-27143} }
EgoThink: Evaluating First-Person Perspective Thinking Capability of Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2024_CVPR, author = {Cheng, Sijie and Guo, Zhicheng and Wu, Jingwen and Fang, Kechen and Li, Peng and Liu, Huaping and Liu, Yang}, title = {EgoThink: Evaluating First-Person Perspective Thinking Capability of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14291-14302} }
Hearing Anything Anywhere-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Mason Long and Sawata, Ryosuke and Clarke, Samuel and Gao, Ruohan and Wu, Shangzhe and Wu, Jiajun}, title = {Hearing Anything Anywhere}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11790-11799} }
PatchFusion: An End-to-End Tile-Based Framework for High-Resolution Monocular Metric Depth Estimation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Zhenyu and Bhat, Shariq Farooq and Wonka, Peter}, title = {PatchFusion: An End-to-End Tile-Based Framework for High-Resolution Monocular Metric Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10016-10025} }
GeneAvatar: Generic Expression-Aware Volumetric Head Avatar Editing from a Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bao_2024_CVPR, author = {Bao, Chong and Zhang, Yinda and Li, Yuan and Zhang, Xiyu and Yang, Bangbang and Bao, Hujun and Pollefeys, Marc and Zhang, Guofeng and Cui, Zhaopeng}, title = {GeneAvatar: Generic Expression-Aware Volumetric Head Avatar Editing from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8952-8963} }
Improved Self-Training for Test-Time Adaptation-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2024_CVPR, author = {Ma, Jing}, title = {Improved Self-Training for Test-Time Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23701-23710} }
Learn to Rectify the Bias of CLIP for Unsupervised Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Jingyun and Kang, Guoliang}, title = {Learn to Rectify the Bias of CLIP for Unsupervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4102-4112} }
Unsupervised Feature Learning with Emergent Data-Driven Prototypicality-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2024_CVPR, author = {Guo, Yunhui and Zhang, Youren and Chen, Yubei and Yu, Stella X.}, title = {Unsupervised Feature Learning with Emergent Data-Driven Prototypicality}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23199-23208} }
Unlocking Pre-trained Image Backbones for Semantic Image Synthesis-
[pdf]
[supp]
[bibtex]@InProceedings{Ifriqi_2024_CVPR, author = {Ifriqi, Tariq Berrada and Verbeek, Jakob and Couprie, Camille and Alahari, Karteek}, title = {Unlocking Pre-trained Image Backbones for Semantic Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7840-7849} }
Retrieval-Augmented Egocentric Video Captioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Jilan and Huang, Yifei and Hou, Junlin and Chen, Guo and Zhang, Yuejie and Feng, Rui and Xie, Weidi}, title = {Retrieval-Augmented Egocentric Video Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13525-13536} }
SkillDiffuser: Interpretable Hierarchical Planning via Skill Abstractions in Diffusion-Based Task Execution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2024_CVPR, author = {Liang, Zhixuan and Mu, Yao and Ma, Hengbo and Tomizuka, Masayoshi and Ding, Mingyu and Luo, Ping}, title = {SkillDiffuser: Interpretable Hierarchical Planning via Skill Abstractions in Diffusion-Based Task Execution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16467-16476} }
Improving Generalized Zero-Shot Learning by Exploring the Diverse Semantics from External Class Names-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Yapeng and Luo, Yong and Wang, Zengmao and Du, Bo}, title = {Improving Generalized Zero-Shot Learning by Exploring the Diverse Semantics from External Class Names}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23344-23353} }
TeMO: Towards Text-Driven 3D Stylization for Multi-Object Meshes-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Xuying and Yin, Bo-Wen and Chen, Yuming and Lin, Zheng and Li, Yunheng and Hou, Qibin and Cheng, Ming-Ming}, title = {TeMO: Towards Text-Driven 3D Stylization for Multi-Object Meshes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19531-19540} }
TE-TAD: Towards Full End-to-End Temporal Action Detection via Time-Aligned Coordinate Expression-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Ho-Joong and Hong, Jung-Ho and Kong, Heejo and Lee, Seong-Whan}, title = {TE-TAD: Towards Full End-to-End Temporal Action Detection via Time-Aligned Coordinate Expression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18837-18846} }
GSNeRF: Generalizable Semantic Neural Radiance Fields with Enhanced 3D Scene Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chou_2024_CVPR, author = {Chou, Zi-Ting and Huang, Sheng-Yu and Liu, I-Jieh and Wang, Yu-Chiang Frank}, title = {GSNeRF: Generalizable Semantic Neural Radiance Fields with Enhanced 3D Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20806-20815} }
Alpha Invariance: On Inverse Scaling Between Distance and Volume Density in Neural Radiance Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ahn_2024_CVPR, author = {Ahn, Joshua and Wang, Haochen and Yeh, Raymond A. and Shakhnarovich, Greg}, title = {Alpha Invariance: On Inverse Scaling Between Distance and Volume Density in Neural Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20396-20405} }
TexTile: A Differentiable Metric for Texture Tileability-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rodriguez-Pardo_2024_CVPR, author = {Rodriguez-Pardo, Carlos and Casas, Dan and Garces, Elena and Lopez-Moreno, Jorge}, title = {TexTile: A Differentiable Metric for Texture Tileability}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4439-4449} }
D3T: Distinctive Dual-Domain Teacher Zigzagging Across RGB-Thermal Gap for Domain-Adaptive Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Do_2024_CVPR, author = {Do, Dinh Phat and Kim, Taehoon and Na, Jaemin and Kim, Jiwon and Lee, Keonho and Cho, Kyunghwan and Hwang, Wonjun}, title = {D3T: Distinctive Dual-Domain Teacher Zigzagging Across RGB-Thermal Gap for Domain-Adaptive Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23313-23322} }
Positive-Unlabeled Learning by Latent Group-Aware Meta Disambiguation-
[pdf]
[supp]
[bibtex]@InProceedings{Long_2024_CVPR, author = {Long, Lin and Wang, Haobo and Jiang, Zhijie and Feng, Lei and Yao, Chang and Chen, Gang and Zhao, Junbo}, title = {Positive-Unlabeled Learning by Latent Group-Aware Meta Disambiguation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23138-23147} }
Improving Image Restoration through Removing Degradations in Textual Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2024_CVPR, author = {Lin, Jingbo and Zhang, Zhilu and Wei, Yuxiang and Ren, Dongwei and Jiang, Dongsheng and Tian, Qi and Zuo, Wangmeng}, title = {Improving Image Restoration through Removing Degradations in Textual Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2866-2878} }
ZONE: Zero-Shot Instruction-Guided Local Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Shanglin and Zeng, Bohan and Feng, Yutang and Gao, Sicheng and Liu, Xiuhui and Liu, Jiaming and Li, Lin and Tang, Xu and Hu, Yao and Liu, Jianzhuang and Zhang, Baochang}, title = {ZONE: Zero-Shot Instruction-Guided Local Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6254-6263} }
U-VAP: User-specified Visual Appearance Personalization via Decoupled Self Augmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, You and Liu, Kean and Mi, Xiaoyue and Tang, Fan and Cao, Juan and Li, Jintao}, title = {U-VAP: User-specified Visual Appearance Personalization via Decoupled Self Augmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9482-9491} }
PointBeV: A Sparse Approach for BeV Predictions-
[pdf]
[supp]
[bibtex]@InProceedings{Chambon_2024_CVPR, author = {Chambon, Loick and Zablocki, Eloi and Chen, Micka\"el and Bartoccioni, Florent and P\'erez, Patrick and Cord, Matthieu}, title = {PointBeV: A Sparse Approach for BeV Predictions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15195-15204} }
From-Ground-To-Objects: Coarse-to-Fine Self-supervised Monocular Depth Estimation of Dynamic Objects with Ground Contact Prior-
[pdf]
[supp]
[bibtex]@InProceedings{Moon_2024_CVPR, author = {Moon, Jaeho and Bello, Juan Luis Gonzalez and Kwon, Byeongjun and Kim, Munchurl}, title = {From-Ground-To-Objects: Coarse-to-Fine Self-supervised Monocular Depth Estimation of Dynamic Objects with Ground Contact Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10519-10529} }
Linguistic-Aware Patch Slimming Framework for Fine-grained Cross-Modal Alignment-
[pdf]
[bibtex]@InProceedings{Fu_2024_CVPR, author = {Fu, Zheren and Zhang, Lei and Xia, Hou and Mao, Zhendong}, title = {Linguistic-Aware Patch Slimming Framework for Fine-grained Cross-Modal Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26307-26316} }
HHMR: Holistic Hand Mesh Recovery by Enhancing the Multimodal Controllability of Graph Diffusion Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Mengcheng and Zhang, Hongwen and Zhang, Yuxiang and Shao, Ruizhi and Yu, Tao and Liu, Yebin}, title = {HHMR: Holistic Hand Mesh Recovery by Enhancing the Multimodal Controllability of Graph Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {645-654} }
SRTube: Video-Language Pre-Training with Action-Centric Video Tube Features and Semantic Role Labeling-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2024_CVPR, author = {Lee, Ju-Hee and Kang, Je-Won}, title = {SRTube: Video-Language Pre-Training with Action-Centric Video Tube Features and Semantic Role Labeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13689-13699} }
Prompt Highlighter: Interactive Control for Multi-Modal LLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yuechen and Qian, Shengju and Peng, Bohao and Liu, Shu and Jia, Jiaya}, title = {Prompt Highlighter: Interactive Control for Multi-Modal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13215-13224} }
Domain-Rectifying Adapter for Cross-Domain Few-Shot Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Su_2024_CVPR, author = {Su, Jiapeng and Fan, Qi and Pei, Wenjie and Lu, Guangming and Chen, Fanglin}, title = {Domain-Rectifying Adapter for Cross-Domain Few-Shot Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24036-24045} }
Robust Self-calibration of Focal Lengths from the Fundamental Matrix-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kocur_2024_CVPR, author = {Kocur, Viktor and Kyselica, Daniel and Kukelova, Zuzana}, title = {Robust Self-calibration of Focal Lengths from the Fundamental Matrix}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5220-5229} }
Continual Learning for Motion Prediction Model via Meta-Representation Learning and Optimal Memory Buffer Retention Strategy-
[pdf]
[bibtex]@InProceedings{Kang_2024_CVPR, author = {Kang, DaeJun and Kum, Dongsuk and Kim, Sanmin}, title = {Continual Learning for Motion Prediction Model via Meta-Representation Learning and Optimal Memory Buffer Retention Strategy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15438-15448} }
PartDistill: 3D Shape Part Segmentation by Vision-Language Model Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Umam_2024_CVPR, author = {Umam, Ardian and Yang, Cheng-Kun and Chen, Min-Hung and Chuang, Jen-Hui and Lin, Yen-Yu}, title = {PartDistill: 3D Shape Part Segmentation by Vision-Language Model Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3470-3479} }
CPP-Net: Embracing Multi-Scale Feature Fusion into Deep Unfolding CP-PPA Network for Compressive Sensing-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2024_CVPR, author = {Guo, Zhen and Gan, Hongping}, title = {CPP-Net: Embracing Multi-Scale Feature Fusion into Deep Unfolding CP-PPA Network for Compressive Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25086-25095} }
EditGuard: Versatile Image Watermarking for Tamper Localization and Copyright Protection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Xuanyu and Li, Runyi and Yu, Jiwen and Xu, Youmin and Li, Weiqi and Zhang, Jian}, title = {EditGuard: Versatile Image Watermarking for Tamper Localization and Copyright Protection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11964-11974} }
3DGStream: On-the-Fly Training of 3D Gaussians for Efficient Streaming of Photo-Realistic Free-Viewpoint Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2024_CVPR, author = {Sun, Jiakai and Jiao, Han and Li, Guangyuan and Zhang, Zhanjie and Zhao, Lei and Xing, Wei}, title = {3DGStream: On-the-Fly Training of 3D Gaussians for Efficient Streaming of Photo-Realistic Free-Viewpoint Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20675-20685} }
FairRAG: Fair Human Generation via Fair Retrieval Augmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shrestha_2024_CVPR, author = {Shrestha, Robik and Zou, Yang and Chen, Qiuyu and Li, Zhiheng and Xie, Yusheng and Deng, Siqi}, title = {FairRAG: Fair Human Generation via Fair Retrieval Augmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11996-12005} }
DragDiffusion: Harnessing Diffusion Models for Interactive Point-based Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2024_CVPR, author = {Shi, Yujun and Xue, Chuhui and Liew, Jun Hao and Pan, Jiachun and Yan, Hanshu and Zhang, Wenqing and Tan, Vincent Y. F. and Bai, Song}, title = {DragDiffusion: Harnessing Diffusion Models for Interactive Point-based Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8839-8849} }
FaceTalk: Audio-Driven Motion Diffusion for Neural Parametric Head Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Aneja_2024_CVPR, author = {Aneja, Shivangi and Thies, Justus and Dai, Angela and Nie{\ss}ner, Matthias}, title = {FaceTalk: Audio-Driven Motion Diffusion for Neural Parametric Head Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21263-21273} }
Mip-Splatting: Alias-free 3D Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Zehao and Chen, Anpei and Huang, Binbin and Sattler, Torsten and Geiger, Andreas}, title = {Mip-Splatting: Alias-free 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19447-19456} }
Learning Coupled Dictionaries from Unpaired Data for Image Super-Resolution-
[pdf]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Longguang and Li, Juncheng and Wang, Yingqian and Hu, Qingyong and Guo, Yulan}, title = {Learning Coupled Dictionaries from Unpaired Data for Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25712-25721} }
Template Free Reconstruction of Human-object Interaction with Procedural Interaction Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2024_CVPR, author = {Xie, Xianghui and Bhatnagar, Bharat Lal and Lenssen, Jan Eric and Pons-Moll, Gerard}, title = {Template Free Reconstruction of Human-object Interaction with Procedural Interaction Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10003-10015} }
Deep Video Inverse Tone Mapping Based on Temporal Clues-
[pdf]
[supp]
[bibtex]@InProceedings{Ye_2024_CVPR, author = {Ye, Yuyao and Zhang, Ning and Zhao, Yang and Cao, Hongbin and Wang, Ronggang}, title = {Deep Video Inverse Tone Mapping Based on Temporal Clues}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25995-26004} }
NeRF-HuGS: Improved Neural Radiance Fields in Non-static Scenes Using Heuristics-Guided Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Jiahao and Qin, Yipeng and Liu, Lingjie and Lu, Jiangbo and Li, Guanbin}, title = {NeRF-HuGS: Improved Neural Radiance Fields in Non-static Scenes Using Heuristics-Guided Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19436-19446} }
Addressing Background Context Bias in Few-Shot Segmentation through Iterative Modulation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Lanyun and Chen, Tianrun and Yin, Jianxiong and See, Simon and Liu, Jun}, title = {Addressing Background Context Bias in Few-Shot Segmentation through Iterative Modulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3370-3379} }
Open-Vocabulary Video Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Peng and Zhou, Xuerong and Pang, Guansong and Sun, Yujia and Liu, Jing and Wang, Peng and Zhang, Yanning}, title = {Open-Vocabulary Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18297-18307} }
ODM: A Text-Image Further Alignment Pre-training Approach for Scene Text Detection and Spotting-
[pdf]
[arXiv]
[bibtex]@InProceedings{Duan_2024_CVPR, author = {Duan, Chen and Fu, Pei and Guo, Shan and Jiang, Qianyi and Wei, Xiaoming}, title = {ODM: A Text-Image Further Alignment Pre-training Approach for Scene Text Detection and Spotting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15587-15597} }
TiNO-Edit: Timestep and Noise Optimization for Robust Diffusion-Based Image Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Sherry X and Vaxman, Yaron and Ben Baruch, Elad and Asulin, David and Moreshet, Aviad and Lien, Kuo-Chin and Sra, Misha and Sen, Pradeep}, title = {TiNO-Edit: Timestep and Noise Optimization for Robust Diffusion-Based Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6337-6346} }
Epistemic Uncertainty Quantification For Pre-Trained Neural Networks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Hanjing and Ji, Qiang}, title = {Epistemic Uncertainty Quantification For Pre-Trained Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11052-11061} }
Diffusion-ES: Gradient-free Planning with Diffusion for Autonomous and Instruction-guided Driving-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Brian and Su, Huangyuan and Gkanatsios, Nikolaos and Ke, Tsung-Wei and Jain, Ayush and Schneider, Jeff and Fragkiadaki, Katerina}, title = {Diffusion-ES: Gradient-free Planning with Diffusion for Autonomous and Instruction-guided Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15342-15353} }
AdaShift: Learning Discriminative Self-Gated Neural Feature Activation With an Adaptive Shift Factor-
[pdf]
[supp]
[bibtex]@InProceedings{Cai_2024_CVPR, author = {Cai, Sudong}, title = {AdaShift: Learning Discriminative Self-Gated Neural Feature Activation With an Adaptive Shift Factor}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5947-5956} }
SCEdit: Efficient and Controllable Image Diffusion Generation via Skip Connection Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2024_CVPR, author = {Jiang, Zeyinzi and Mao, Chaojie and Pan, Yulin and Han, Zhen and Zhang, Jingfeng}, title = {SCEdit: Efficient and Controllable Image Diffusion Generation via Skip Connection Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8995-9004} }
MRC-Net: 6-DoF Pose Estimation with MultiScale Residual Correlation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Yuelong and Mao, Yafei and Bala, Raja and Hadap, Sunil}, title = {MRC-Net: 6-DoF Pose Estimation with MultiScale Residual Correlation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10476-10486} }
MonoCD: Monocular 3D Object Detection with Complementary Depths-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2024_CVPR, author = {Yan, Longfei and Yan, Pei and Xiong, Shengzhou and Xiang, Xuanyu and Tan, Yihua}, title = {MonoCD: Monocular 3D Object Detection with Complementary Depths}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10248-10257} }
ImageNet-D: Benchmarking Neural Network Robustness on Diffusion Synthetic Object-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Chenshuang and Pan, Fei and Kim, Junmo and Kweon, In So and Mao, Chengzhi}, title = {ImageNet-D: Benchmarking Neural Network Robustness on Diffusion Synthetic Object}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21752-21762} }
Consistent3D: Towards Consistent High-Fidelity Text-to-3D Generation with Deterministic Sampling Prior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Zike and Zhou, Pan and Yi, Xuanyu and Yuan, Xiaoding and Zhang, Hanwang}, title = {Consistent3D: Towards Consistent High-Fidelity Text-to-3D Generation with Deterministic Sampling Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9892-9902} }
ManipLLM: Embodied Multimodal Large Language Model for Object-Centric Robotic Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Xiaoqi and Zhang, Mingxu and Geng, Yiran and Geng, Haoran and Long, Yuxing and Shen, Yan and Zhang, Renrui and Liu, Jiaming and Dong, Hao}, title = {ManipLLM: Embodied Multimodal Large Language Model for Object-Centric Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18061-18070} }
BA-SAM: Scalable Bias-Mode Attention Mask for Segment Anything Model-
[pdf]
[supp]
[bibtex]@InProceedings{Song_2024_CVPR, author = {Song, Yiran and Zhou, Qianyu and Li, Xiangtai and Fan, Deng-Ping and Lu, Xuequan and Ma, Lizhuang}, title = {BA-SAM: Scalable Bias-Mode Attention Mask for Segment Anything Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3162-3173} }
Text-Enhanced Data-free Approach for Federated Class-Incremental Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tran_2024_CVPR, author = {Tran, Minh-Tuan and Le, Trung and Le, Xuan-May and Harandi, Mehrtash and Phung, Dinh}, title = {Text-Enhanced Data-free Approach for Federated Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23870-23880} }
Deciphering 'What' and 'Where' Visual Pathways from Spectral Clustering of Layer-Distributed Neural Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Xiao and Yunis, David and Maire, Michael}, title = {Deciphering 'What' and 'Where' Visual Pathways from Spectral Clustering of Layer-Distributed Neural Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4165-4175} }
GLaMM: Pixel Grounding Large Multimodal Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rasheed_2024_CVPR, author = {Rasheed, Hanoona and Maaz, Muhammad and Shaji, Sahal and Shaker, Abdelrahman and Khan, Salman and Cholakkal, Hisham and Anwer, Rao M. and Xing, Eric and Yang, Ming-Hsuan and Khan, Fahad S.}, title = {GLaMM: Pixel Grounding Large Multimodal Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13009-13018} }
Incremental Residual Concept Bottleneck Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shang_2024_CVPR, author = {Shang, Chenming and Zhou, Shiji and Zhang, Hengyuan and Ni, Xinzhe and Yang, Yujiu and Wang, Yuwang}, title = {Incremental Residual Concept Bottleneck Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11030-11040} }
SPOC: Imitating Shortest Paths in Simulation Enables Effective Navigation and Manipulation in the Real World-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ehsani_2024_CVPR, author = {Ehsani, Kiana and Gupta, Tanmay and Hendrix, Rose and Salvador, Jordi and Weihs, Luca and Zeng, Kuo-Hao and Singh, Kunal Pratap and Kim, Yejin and Han, Winson and Herrasti, Alvaro and Krishna, Ranjay and Schwenk, Dustin and VanderBilt, Eli and Kembhavi, Aniruddha}, title = {SPOC: Imitating Shortest Paths in Simulation Enables Effective Navigation and Manipulation in the Real World}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16238-16250} }
Real-Time Exposure Correction via Collaborative Transformations and Adaptive Sampling-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Ziwen and Zhang, Feng and Cao, Meng and Zhang, Jinpu and Shao, Yuanjie and Wang, Yuehuan and Sang, Nong}, title = {Real-Time Exposure Correction via Collaborative Transformations and Adaptive Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2984-2994} }
Lodge: A Coarse to Fine Diffusion Network for Long Dance Generation Guided by the Characteristic Dance Primitives-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Ronghui and Zhang, YuXiang and Zhang, Yachao and Zhang, Hongwen and Guo, Jie and Zhang, Yan and Liu, Yebin and Li, Xiu}, title = {Lodge: A Coarse to Fine Diffusion Network for Long Dance Generation Guided by the Characteristic Dance Primitives}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1524-1534} }
UDiFF: Generating Conditional Unsigned Distance Fields with Optimal Wavelet Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Junsheng and Zhang, Weiqi and Ma, Baorui and Shi, Kanle and Liu, Yu-Shen and Han, Zhizhong}, title = {UDiFF: Generating Conditional Unsigned Distance Fields with Optimal Wavelet Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21496-21506} }
LoCoNet: Long-Short Context Network for Active Speaker Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Xizi and Cheng, Feng and Bertasius, Gedas}, title = {LoCoNet: Long-Short Context Network for Active Speaker Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18462-18472} }
D3still: Decoupled Differential Distillation for Asymmetric Image Retrieval-
[pdf]
[bibtex]@InProceedings{Xie_2024_CVPR, author = {Xie, Yi and Lin, Yihong and Cai, Wenjie and Xu, Xuemiao and Zhang, Huaidong and Du, Yong and He, Shengfeng}, title = {D3still: Decoupled Differential Distillation for Asymmetric Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17181-17190} }
Transcending Forgery Specificity with Latent Space Augmentation for Generalizable Deepfake Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2024_CVPR, author = {Yan, Zhiyuan and Luo, Yuhao and Lyu, Siwei and Liu, Qingshan and Wu, Baoyuan}, title = {Transcending Forgery Specificity with Latent Space Augmentation for Generalizable Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8984-8994} }
Scaling Laws of Synthetic Images for Model Training ... for Now-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2024_CVPR, author = {Fan, Lijie and Chen, Kaifeng and Krishnan, Dilip and Katabi, Dina and Isola, Phillip and Tian, Yonglong}, title = {Scaling Laws of Synthetic Images for Model Training ... for Now}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7382-7392} }
Towards Large-scale 3D Representation Learning with Multi-dataset Point Prompt Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Xiaoyang and Tian, Zhuotao and Wen, Xin and Peng, Bohao and Liu, Xihui and Yu, Kaicheng and Zhao, Hengshuang}, title = {Towards Large-scale 3D Representation Learning with Multi-dataset Point Prompt Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19551-19562} }
Learning Triangular Distribution in Visual World-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Ping and Zhang, Xingpeng and Zhou, Chengtao and Fan, Dichao and Tu, Peng and Zhang, Le and Qian, Yanlin}, title = {Learning Triangular Distribution in Visual World}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11019-11029} }
State Space Models for Event Cameras-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zubic_2024_CVPR, author = {Zubic, Nikola and Gehrig, Mathias and Scaramuzza, Davide}, title = {State Space Models for Event Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5819-5828} }
EmbodiedScan: A Holistic Multi-Modal 3D Perception Suite Towards Embodied AI-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Tai and Mao, Xiaohan and Zhu, Chenming and Xu, Runsen and Lyu, Ruiyuan and Li, Peisen and Chen, Xiao and Zhang, Wenwei and Chen, Kai and Xue, Tianfan and Liu, Xihui and Lu, Cewu and Lin, Dahua and Pang, Jiangmiao}, title = {EmbodiedScan: A Holistic Multi-Modal 3D Perception Suite Towards Embodied AI}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19757-19767} }
SHINOBI: Shape and Illumination using Neural Object Decomposition via BRDF Optimization In-the-wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Engelhardt_2024_CVPR, author = {Engelhardt, Andreas and Raj, Amit and Boss, Mark and Zhang, Yunzhi and Kar, Abhishek and Li, Yuanzhen and Sun, Deqing and Brualla, Ricardo Martin and Barron, Jonathan T. and Lensch, Hendrik P. A. and Jampani, Varun}, title = {SHINOBI: Shape and Illumination using Neural Object Decomposition via BRDF Optimization In-the-wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19636-19646} }
ES3: Evolving Self-Supervised Learning of Robust Audio-Visual Speech Representations-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yuanhang and Yang, Shuang and Shan, Shiguang and Chen, Xilin}, title = {ES3: Evolving Self-Supervised Learning of Robust Audio-Visual Speech Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27069-27079} }
TeTriRF: Temporal Tri-Plane Radiance Fields for Efficient Free-Viewpoint Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Minye and Wang, Zehao and Kouros, Georgios and Tuytelaars, Tinne}, title = {TeTriRF: Temporal Tri-Plane Radiance Fields for Efficient Free-Viewpoint Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6487-6496} }
Motion2VecSets: 4D Latent Vector Set Diffusion for Non-rigid Shape Reconstruction and Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2024_CVPR, author = {Cao, Wei and Luo, Chang and Zhang, Biao and Nie{\ss}ner, Matthias and Tang, Jiapeng}, title = {Motion2VecSets: 4D Latent Vector Set Diffusion for Non-rigid Shape Reconstruction and Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20496-20506} }
DiaLoc: An Iterative Approach to Embodied Dialog Localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Chao and Li, Mohan and Budvytis, Ignas and Liwicki, Stephan}, title = {DiaLoc: An Iterative Approach to Embodied Dialog Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12585-12593} }
Self-Training Large Language Models for Improved Visual Program Synthesis With Visual Reinforcement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Khan_2024_CVPR, author = {Khan, Zaid and BG, Vijay Kumar and Schulter, Samuel and Fu, Yun and Chandraker, Manmohan}, title = {Self-Training Large Language Models for Improved Visual Program Synthesis With Visual Reinforcement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14344-14353} }
A2XP: Towards Private Domain Generalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Geunhyeok and Hwang, Hyoseok}, title = {A2XP: Towards Private Domain Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23544-23553} }
Event-assisted Low-Light Video Object Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Hebei and Wang, Jin and Yuan, Jiahui and Li, Yue and Weng, Wenming and Peng, Yansong and Zhang, Yueyi and Xiong, Zhiwei and Sun, Xiaoyan}, title = {Event-assisted Low-Light Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3250-3259} }
Active Domain Adaptation with False Negative Prediction for Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Nakamura_2024_CVPR, author = {Nakamura, Yuzuru and Ishii, Yasunori and Yamashita, Takayoshi}, title = {Active Domain Adaptation with False Negative Prediction for Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28782-28792} }
MLIP: Enhancing Medical Visual Representation with Divergence Encoder and Knowledge-guided Contrastive Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Zhe and Yang, Laurence T. and Ren, Bocheng and Nie, Xin and Gao, Zhangyang and Tan, Cheng and Li, Stan Z.}, title = {MLIP: Enhancing Medical Visual Representation with Divergence Encoder and Knowledge-guided Contrastive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11704-11714} }
Generative 3D Part Assembly via Part-Whole-Hierarchy Message Passing-
[pdf]
[arXiv]
[bibtex]@InProceedings{Du_2024_CVPR, author = {Du, Bi'an and Gao, Xiang and Hu, Wei and Liao, Renjie}, title = {Generative 3D Part Assembly via Part-Whole-Hierarchy Message Passing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20850-20859} }
VidToMe: Video Token Merging for Zero-Shot Video Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Xirui and Ma, Chao and Yang, Xiaokang and Yang, Ming-Hsuan}, title = {VidToMe: Video Token Merging for Zero-Shot Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7486-7495} }
FaceChain-SuDe: Building Derived Class to Inherit Category Attributes for One-shot Subject-Driven Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Qiao_2024_CVPR, author = {Qiao, Pengchong and Shang, Lei and Liu, Chang and Sun, Baigui and Ji, Xiangyang and Chen, Jie}, title = {FaceChain-SuDe: Building Derived Class to Inherit Category Attributes for One-shot Subject-Driven Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7215-7224} }
Benchmarking Segmentation Models with Mask-Preserved Attribute Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yin_2024_CVPR, author = {Yin, Zijin and Liang, Kongming and Li, Bing and Ma, Zhanyu and Guo, Jun}, title = {Benchmarking Segmentation Models with Mask-Preserved Attribute Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22509-22519} }
Analyzing and Improving the Training Dynamics of Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Karras_2024_CVPR, author = {Karras, Tero and Aittala, Miika and Lehtinen, Jaakko and Hellsten, Janne and Aila, Timo and Laine, Samuli}, title = {Analyzing and Improving the Training Dynamics of Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24174-24184} }
Hierarchical Correlation Clustering and Tree Preserving Embedding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chehreghani_2024_CVPR, author = {Chehreghani, Morteza Haghir and Chehreghani, Mostafa Haghir}, title = {Hierarchical Correlation Clustering and Tree Preserving Embedding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23083-23093} }
StableVITON: Learning Semantic Correspondence with Latent Diffusion Model for Virtual Try-On-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Jeongho and Gu, Guojung and Park, Minho and Park, Sunghyun and Choo, Jaegul}, title = {StableVITON: Learning Semantic Correspondence with Latent Diffusion Model for Virtual Try-On}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8176-8185} }
Can Protective Perturbation Safeguard Personal Data from Being Exploited by Stable Diffusion?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Zhengyue and Duan, Jinhao and Xu, Kaidi and Wang, Chenan and Zhang, Rui and Du, Zidong and Guo, Qi and Hu, Xing}, title = {Can Protective Perturbation Safeguard Personal Data from Being Exploited by Stable Diffusion?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24398-24407} }
Make-Your-Anchor: A Diffusion-based 2D Avatar Generation Framework-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Ziyao and Tang, Fan and Zhang, Yong and Cun, Xiaodong and Cao, Juan and Li, Jintao and Lee, Tong-Yee}, title = {Make-Your-Anchor: A Diffusion-based 2D Avatar Generation Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6997-7006} }
MultiPLY: A Multisensory Object-Centric Embodied Large Language Model in 3D World-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hong_2024_CVPR, author = {Hong, Yining and Zheng, Zishuo and Chen, Peihao and Wang, Yian and Li, Junyan and Gan, Chuang}, title = {MultiPLY: A Multisensory Object-Centric Embodied Large Language Model in 3D World}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26406-26416} }
Learning to Visually Localize Sound Sources from Mixtures without Prior Source Knowledge-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Dongjin and Um, Sung Jin and Lee, Sangmin and Kim, Jung Uk}, title = {Learning to Visually Localize Sound Sources from Mixtures without Prior Source Knowledge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26467-26476} }
Learning Dynamic Tetrahedra for High-Quality Talking Head Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Zicheng and Zheng, Ruobing and Li, Bonan and Han, Congying and Li, Tianqi and Wang, Meng and Guo, Tiande and Chen, Jingdong and Liu, Ziwen and Yang, Ming}, title = {Learning Dynamic Tetrahedra for High-Quality Talking Head Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5209-5219} }
Collaborative Learning of Anomalies with Privacy (CLAP) for Unsupervised Video Anomaly Detection: A New Baseline-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Al-lahham_2024_CVPR, author = {Al-lahham, Anas and Zaheer, Muhammad Zaigham and Tastan, Nurbek and Nandakumar, Karthik}, title = {Collaborative Learning of Anomalies with Privacy (CLAP) for Unsupervised Video Anomaly Detection: A New Baseline}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12416-12425} }
Regressor-Segmenter Mutual Prompt Learning for Crowd Counting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2024_CVPR, author = {Guo, Mingyue and Yuan, Li and Yan, Zhaoyi and Chen, Binghui and Wang, Yaowei and Ye, Qixiang}, title = {Regressor-Segmenter Mutual Prompt Learning for Crowd Counting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28380-28389} }
Instantaneous Perception of Moving Objects in 3D-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Di and Zhuang, Bingbing and Metaxas, Dimitris N. and Chandraker, Manmohan}, title = {Instantaneous Perception of Moving Objects in 3D}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19573-19583} }
CORE-MPI: Consistency Object Removal with Embedding MultiPlane Image-
[pdf]
[supp]
[bibtex]@InProceedings{Yoon_2024_CVPR, author = {Yoon, Donggeun and Cho, Donghyeon}, title = {CORE-MPI: Consistency Object Removal with Embedding MultiPlane Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20081-20090} }
3D Geometry-Aware Deformable Gaussian Splatting for Dynamic View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2024_CVPR, author = {Lu, Zhicheng and Guo, Xiang and Hui, Le and Chen, Tianrui and Yang, Min and Tang, Xiao and Zhu, Feng and Dai, Yuchao}, title = {3D Geometry-Aware Deformable Gaussian Splatting for Dynamic View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8900-8910} }
Person-in-WiFi 3D: End-to-End Multi-Person 3D Pose Estimation with Wi-Fi-
[pdf]
[bibtex]@InProceedings{Yan_2024_CVPR, author = {Yan, Kangwei and Wang, Fei and Qian, Bo and Ding, Han and Han, Jinsong and Wei, Xing}, title = {Person-in-WiFi 3D: End-to-End Multi-Person 3D Pose Estimation with Wi-Fi}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {969-978} }
Backpropagation-free Network for 3D Test-time Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Yanshuo and Cheraghian, Ali and Hayder, Zeeshan and Hong, Jie and Ramasinghe, Sameera and Rahman, Shafin and Ahmedt-Aristizabal, David and Li, Xuesong and Petersson, Lars and Harandi, Mehrtash}, title = {Backpropagation-free Network for 3D Test-time Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23231-23241} }
Resource-Efficient Transformer Pruning for Finetuning of Large Models-
[pdf]
[supp]
[bibtex]@InProceedings{Ilhan_2024_CVPR, author = {Ilhan, Fatih and Su, Gong and Tekin, Selim Furkan and Huang, Tiansheng and Hu, Sihao and Liu, Ling}, title = {Resource-Efficient Transformer Pruning for Finetuning of Large Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16206-16215} }
ParamISP: Learned Forward and Inverse ISPs using Camera Parameters-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Woohyeok and Kim, Geonu and Lee, Junyong and Lee, Seungyong and Baek, Seung-Hwan and Cho, Sunghyun}, title = {ParamISP: Learned Forward and Inverse ISPs using Camera Parameters}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26067-26076} }
Perturbing Attention Gives You More Bang for the Buck: Subtle Imaging Perturbations That Efficiently Fool Customized Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Jingyao and Lu, Yuetong and Li, Yandong and Lu, Siyang and Wang, Dongdong and Wei, Xiang}, title = {Perturbing Attention Gives You More Bang for the Buck: Subtle Imaging Perturbations That Efficiently Fool Customized Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24534-24543} }
Fairy: Fast Parallelized Instruction-Guided Video-to-Video Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Bichen and Chuang, Ching-Yao and Wang, Xiaoyan and Jia, Yichen and Krishnakumar, Kapil and Xiao, Tong and Liang, Feng and Yu, Licheng and Vajda, Peter}, title = {Fairy: Fast Parallelized Instruction-Guided Video-to-Video Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8261-8270} }
SmartEdit: Exploring Complex Instruction-based Image Editing with Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Yuzhou and Xie, Liangbin and Wang, Xintao and Yuan, Ziyang and Cun, Xiaodong and Ge, Yixiao and Zhou, Jiantao and Dong, Chao and Huang, Rui and Zhang, Ruimao and Shan, Ying}, title = {SmartEdit: Exploring Complex Instruction-based Image Editing with Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8362-8371} }
SeNM-VAE: Semi-Supervised Noise Modeling with Hierarchical Variational Autoencoder-
[pdf]
[supp]
[bibtex]@InProceedings{Zheng_2024_CVPR, author = {Zheng, Dihan and Zou, Yihang and Zhang, Xiaowen and Bao, Chenglong}, title = {SeNM-VAE: Semi-Supervised Noise Modeling with Hierarchical Variational Autoencoder}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25889-25899} }
Multimodal Industrial Anomaly Detection by Crossmodal Feature Mapping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Costanzino_2024_CVPR, author = {Costanzino, Alex and Ramirez, Pierluigi Zama and Lisanti, Giuseppe and Di Stefano, Luigi}, title = {Multimodal Industrial Anomaly Detection by Crossmodal Feature Mapping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17234-17243} }
FFF: Fixing Flawed Foundations in Contrastive Pre-Training Results in Very Strong Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bulat_2024_CVPR, author = {Bulat, Adrian and Ouali, Yassine and Tzimiropoulos, Georgios}, title = {FFF: Fixing Flawed Foundations in Contrastive Pre-Training Results in Very Strong Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14172-14182} }
Anchor-based Robust Finetuning of Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2024_CVPR, author = {Han, Jinwei and Lin, Zhiwen and Sun, Zhongyisun and Gao, Yingguo and Yan, Ke and Ding, Shouhong and Gao, Yuan and Xia, Gui-Song}, title = {Anchor-based Robust Finetuning of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26919-26928} }
Low-power Continuous Remote Behavioral Localization with Event Cameras-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hamann_2024_CVPR, author = {Hamann, Friedhelm and Ghosh, Suman and Martinez, Ignacio Juarez and Hart, Tom and Kacelnik, Alex and Gallego, Guillermo}, title = {Low-power Continuous Remote Behavioral Localization with Event Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18612-18621} }
SportsHHI: A Dataset for Human-Human Interaction Detection in Sports Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Tao and He, Runyu and Wu, Gangshan and Wang, Limin}, title = {SportsHHI: A Dataset for Human-Human Interaction Detection in Sports Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18537-18546} }
DiSR-NeRF: Diffusion-Guided View-Consistent Super-Resolution NeRF-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2024_CVPR, author = {Lee, Jie Long and Li, Chen and Lee, Gim Hee}, title = {DiSR-NeRF: Diffusion-Guided View-Consistent Super-Resolution NeRF}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20561-20570} }
Dispersed Structured Light for Hyperspectral 3D Imaging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shin_2024_CVPR, author = {Shin, Suhyun and Choi, Seokjun and Heide, Felix and Baek, Seung-Hwan}, title = {Dispersed Structured Light for Hyperspectral 3D Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24997-25006} }
CrowdDiff: Multi-hypothesis Crowd Density Estimation using Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ranasinghe_2024_CVPR, author = {Ranasinghe, Yasiru and Nair, Nithin Gopalakrishnan and Bandara, Wele Gedara Chaminda and Patel, Vishal M.}, title = {CrowdDiff: Multi-hypothesis Crowd Density Estimation using Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12809-12819} }
It's All About Your Sketch: Democratising Sketch Control in Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Koley_2024_CVPR, author = {Koley, Subhadeep and Bhunia, Ayan Kumar and Sekhri, Deeptanshu and Sain, Aneeshan and Chowdhury, Pinaki Nath and Xiang, Tao and Song, Yi-Zhe}, title = {It's All About Your Sketch: Democratising Sketch Control in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7204-7214} }
GLID: Pre-training a Generalist Encoder-Decoder Vision Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Jihao and Zheng, Jinliang and Liu, Yu and Li, Hongsheng}, title = {GLID: Pre-training a Generalist Encoder-Decoder Vision Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22851-22860} }
Diffusion-FOF: Single-View Clothed Human Reconstruction via Diffusion-Based Fourier Occupancy Field-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Yuanzhen and Luo, Fei and Xiao, Chunxia}, title = {Diffusion-FOF: Single-View Clothed Human Reconstruction via Diffusion-Based Fourier Occupancy Field}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9525-9534} }
When StyleGAN Meets Stable Diffusion: a W+ Adapter for Personalized Image Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Xiaoming and Hou, Xinyu and Loy, Chen Change}, title = {When StyleGAN Meets Stable Diffusion: a W+ Adapter for Personalized Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2187-2196} }
ToNNO: Tomographic Reconstruction of a Neural Network's Output for Weakly Supervised Segmentation of 3D Medical Images-
[pdf]
[supp]
[bibtex]@InProceedings{Schmidt-Mengin_2024_CVPR, author = {Schmidt-Mengin, Marius and Benichoux, Alexis and Belachew, Shibeshih and Komodakis, Nikos and Paragios, Nikos}, title = {ToNNO: Tomographic Reconstruction of a Neural Network's Output for Weakly Supervised Segmentation of 3D Medical Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11428-11438} }
Learning to Navigate Efficiently and Precisely in Real Environments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bono_2024_CVPR, author = {Bono, Guillaume and Poirier, Herv\'e and Antsfeld, Leonid and Monaci, Gianluca and Chidlovskii, Boris and Wolf, Christian}, title = {Learning to Navigate Efficiently and Precisely in Real Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17837-17846} }
CAM Back Again: Large Kernel CNNs from a Weakly Supervised Object Localization Perspective-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yasuki_2024_CVPR, author = {Yasuki, Shunsuke and Taki, Masato}, title = {CAM Back Again: Large Kernel CNNs from a Weakly Supervised Object Localization Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {341-351} }
VkD: Improving Knowledge Distillation using Orthogonal Projections-
[pdf]
[supp]
[bibtex]@InProceedings{Miles_2024_CVPR, author = {Miles, Roy and Elezi, Ismail and Deng, Jiankang}, title = {VkD: Improving Knowledge Distillation using Orthogonal Projections}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15720-15730} }
Putting the Object Back into Video Object Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2024_CVPR, author = {Cheng, Ho Kei and Oh, Seoung Wug and Price, Brian and Lee, Joon-Young and Schwing, Alexander}, title = {Putting the Object Back into Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3151-3161} }
Concept Weaver: Enabling Multi-Concept Fusion in Text-to-Image Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kwon_2024_CVPR, author = {Kwon, Gihyun and Jenni, Simon and Li, Dingzeyu and Lee, Joon-Young and Ye, Jong Chul and Heilbron, Fabian Caba}, title = {Concept Weaver: Enabling Multi-Concept Fusion in Text-to-Image Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8880-8889} }
PKU-DyMVHumans: A Multi-View Video Benchmark for High-Fidelity Dynamic Human Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Zheng_2024_CVPR, author = {Zheng, Xiaoyun and Liao, Liwei and Li, Xufeng and Jiao, Jianbo and Wang, Rongjie and Gao, Feng and Wang, Shiqi and Wang, Ronggang}, title = {PKU-DyMVHumans: A Multi-View Video Benchmark for High-Fidelity Dynamic Human Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22530-22540} }
Cross-Domain Few-Shot Segmentation via Iterative Support-Query Correspondence Mining-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nie_2024_CVPR, author = {Nie, Jiahao and Xing, Yun and Zhang, Gongjie and Yan, Pei and Xiao, Aoran and Tan, Yap-Peng and Kot, Alex C. and Lu, Shijian}, title = {Cross-Domain Few-Shot Segmentation via Iterative Support-Query Correspondence Mining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3380-3390} }
CausalPC: Improving the Robustness of Point Cloud Classification by Causal Effect Identification-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Yuanmin and Zhang, Mi and Ding, Daizong and Jiang, Erling and Wang, Zhaoxiang and Yang, Min}, title = {CausalPC: Improving the Robustness of Point Cloud Classification by Causal Effect Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19779-19789} }
LASA: Instance Reconstruction from Real Scans using A Large-scale Aligned Shape Annotation Dataset-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Haolin and Ye, Chongjie and Nie, Yinyu and He, Yingfan and Han, Xiaoguang}, title = {LASA: Instance Reconstruction from Real Scans using A Large-scale Aligned Shape Annotation Dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20454-20464} }
LaRE^2: Latent Reconstruction Error Based Method for Diffusion-Generated Image Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Luo_2024_CVPR, author = {Luo, Yunpeng and Du, Junlong and Yan, Ke and Ding, Shouhong}, title = {LaRE{\textasciicircum}2: Latent Reconstruction Error Based Method for Diffusion-Generated Image Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17006-17015} }
DiffSCI: Zero-Shot Snapshot Compressive Imaging via Iterative Spectral Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2024_CVPR, author = {Pan, Zhenghao and Zeng, Haijin and Cao, Jiezhang and Zhang, Kai and Chen, Yongyong}, title = {DiffSCI: Zero-Shot Snapshot Compressive Imaging via Iterative Spectral Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25297-25306} }
DiffSHEG: A Diffusion-Based Approach for Real-Time Speech-driven Holistic 3D Expression and Gesture Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Junming and Liu, Yunfei and Wang, Jianan and Zeng, Ailing and Li, Yu and Chen, Qifeng}, title = {DiffSHEG: A Diffusion-Based Approach for Real-Time Speech-driven Holistic 3D Expression and Gesture Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7352-7361} }
MeLFusion: Synthesizing Music from Image and Language Cues using Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chowdhury_2024_CVPR, author = {Chowdhury, Sanjoy and Nag, Sayan and Joseph, K J and Srinivasan, Balaji Vasan and Manocha, Dinesh}, title = {MeLFusion: Synthesizing Music from Image and Language Cues using Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26826-26835} }
T4P: Test-Time Training of Trajectory Prediction via Masked Autoencoder and Actor-specific Token Memory-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2024_CVPR, author = {Park, Daehee and Jeong, Jaeseok and Yoon, Sung-Hoon and Jeong, Jaewoo and Yoon, Kuk-Jin}, title = {T4P: Test-Time Training of Trajectory Prediction via Masked Autoencoder and Actor-specific Token Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15065-15076} }
Noisy-Correspondence Learning for Text-to-Image Person Re-identification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qin_2024_CVPR, author = {Qin, Yang and Chen, Yingke and Peng, Dezhong and Peng, Xi and Zhou, Joey Tianyi and Hu, Peng}, title = {Noisy-Correspondence Learning for Text-to-Image Person Re-identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27197-27206} }
InstaGen: Enhancing Object Detection by Training on Synthetic Dataset-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2024_CVPR, author = {Feng, Chengjian and Zhong, Yujie and Jie, Zequn and Xie, Weidi and Ma, Lin}, title = {InstaGen: Enhancing Object Detection by Training on Synthetic Dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14121-14130} }
PanoRecon: Real-Time Panoptic 3D Reconstruction from Monocular Video-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Dong and Yan, Zike and Zha, Hongbin}, title = {PanoRecon: Real-Time Panoptic 3D Reconstruction from Monocular Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21507-21518} }
Animating General Image with Large Visual Motion Model-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Dengsheng and Wei, Xiaoming and Wei, Xiaolin}, title = {Animating General Image with Large Visual Motion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7131-7140} }
Visual Point Cloud Forecasting enables Scalable Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Zetong and Chen, Li and Sun, Yanan and Li, Hongyang}, title = {Visual Point Cloud Forecasting enables Scalable Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14673-14684} }
Towards Transferable Targeted 3D Adversarial Attack in the Physical World-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Yao and Dong, Yinpeng and Ruan, Shouwei and Yang, Xiao and Su, Hang and Wei, Xingxing}, title = {Towards Transferable Targeted 3D Adversarial Attack in the Physical World}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24512-24522} }
SwitchLight: Co-design of Physics-driven Architecture and Pre-training Framework for Human Portrait Relighting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Hoon and Jang, Minje and Yoon, Wonjun and Lee, Jisoo and Na, Donghyun and Woo, Sanghyun}, title = {SwitchLight: Co-design of Physics-driven Architecture and Pre-training Framework for Human Portrait Relighting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25096-25106} }
DIRECT-3D: Learning Direct Text-to-3D Generation on Massive Noisy 3D Data-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Qihao and Zhang, Yi and Bai, Song and Kortylewski, Adam and Yuille, Alan}, title = {DIRECT-3D: Learning Direct Text-to-3D Generation on Massive Noisy 3D Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6881-6891} }
Synthesize Step-by-Step: Tools Templates and LLMs as Data Generators for Reasoning-Based Chart VQA-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Zhuowan and Jasani, Bhavan and Tang, Peng and Ghadar, Shabnam}, title = {Synthesize Step-by-Step: Tools Templates and LLMs as Data Generators for Reasoning-Based Chart VQA}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13613-13623} }
LayoutLLM: Layout Instruction Tuning with Large Language Models for Document Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2024_CVPR, author = {Luo, Chuwei and Shen, Yufan and Zhu, Zhaoqing and Zheng, Qi and Yu, Zhi and Yao, Cong}, title = {LayoutLLM: Layout Instruction Tuning with Large Language Models for Document Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15630-15640} }
ProTeCt: Prompt Tuning for Taxonomic Open Set Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Tz-Ying and Ho, Chih-Hui and Vasconcelos, Nuno}, title = {ProTeCt: Prompt Tuning for Taxonomic Open Set Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16531-16540} }
Adapters Strike Back-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Steitz_2024_CVPR, author = {Steitz, Jan-Martin O. and Roth, Stefan}, title = {Adapters Strike Back}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23449-23459} }
Masked Autoencoders for Microscopy are Scalable Learners of Cellular Biology-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kraus_2024_CVPR, author = {Kraus, Oren and Kenyon-Dean, Kian and Saberian, Saber and Fallah, Maryam and McLean, Peter and Leung, Jess and Sharma, Vasudev and Khan, Ayla and Balakrishnan, Jia and Celik, Safiye and Beaini, Dominique and Sypetkowski, Maciej and Cheng, Chi Vicky and Morse, Kristen and Makes, Maureen and Mabey, Ben and Earnshaw, Berton}, title = {Masked Autoencoders for Microscopy are Scalable Learners of Cellular Biology}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11757-11768} }
OHTA: One-shot Hand Avatar via Data-driven Implicit Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2024_CVPR, author = {Zheng, Xiaozheng and Wen, Chao and Su, Zhuo and Xu, Zeran and Li, Zhaohu and Zhao, Yang and Xue, Zhou}, title = {OHTA: One-shot Hand Avatar via Data-driven Implicit Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {799-810} }
Segment and Caption Anything-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Xiaoke and Wang, Jianfeng and Tang, Yansong and Zhang, Zheng and Hu, Han and Lu, Jiwen and Wang, Lijuan and Liu, Zicheng}, title = {Segment and Caption Anything}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13405-13417} }
Human Motion Prediction Under Unexpected Perturbation-
[pdf]
[supp]
[bibtex]@InProceedings{Yue_2024_CVPR, author = {Yue, Jiangbei and Li, Baiyi and Pettr\'e, Julien and Seyfried, Armin and Wang, He}, title = {Human Motion Prediction Under Unexpected Perturbation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1501-1511} }
Text-to-3D Generation with Bidirectional Diffusion using both 2D and 3D priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ding_2024_CVPR, author = {Ding, Lihe and Dong, Shaocong and Huang, Zhanpeng and Wang, Zibin and Zhang, Yiyuan and Gong, Kaixiong and Xu, Dan and Xue, Tianfan}, title = {Text-to-3D Generation with Bidirectional Diffusion using both 2D and 3D priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5115-5124} }
CLIP-Driven Open-Vocabulary 3D Scene Graph Generation via Cross-Modality Contrastive Learning-
[pdf]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Lianggangxu and Wang, Xuejiao and Lu, Jiale and Lin, Shaohui and Wang, Changbo and He, Gaoqi}, title = {CLIP-Driven Open-Vocabulary 3D Scene Graph Generation via Cross-Modality Contrastive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27863-27873} }
Adversarial Backdoor Attack by Naturalistic Data Poisoning on Trajectory Prediction in Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pourkeshavarz_2024_CVPR, author = {Pourkeshavarz, Mozhgan and Sabokrou, Mohammad and Rasouli, Amir}, title = {Adversarial Backdoor Attack by Naturalistic Data Poisoning on Trajectory Prediction in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14885-14894} }
Make-It-Vivid: Dressing Your Animatable Biped Cartoon Characters from Text-
[pdf]
[bibtex]@InProceedings{Tang_2024_CVPR, author = {Tang, Junshu and Zeng, Yanhong and Fan, Ke and Wang, Xuheng and Dai, Bo and Chen, Kai and Ma, Lizhuang}, title = {Make-It-Vivid: Dressing Your Animatable Biped Cartoon Characters from Text}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6243-6253} }
StraightPCF: Straight Point Cloud Filtering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{de_Silva_Edirimuni_2024_CVPR, author = {de Silva Edirimuni, Dasith and Lu, Xuequan and Li, Gang and Wei, Lei and Robles-Kelly, Antonio and Li, Hongdong}, title = {StraightPCF: Straight Point Cloud Filtering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20721-20730} }
Mirasol3B: A Multimodal Autoregressive Model for Time-Aligned and Contextual Modalities-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Piergiovanni_2024_CVPR, author = {Piergiovanni, AJ and Noble, Isaac and Kim, Dahun and Ryoo, Michael S. and Gomes, Victor and Angelova, Anelia}, title = {Mirasol3B: A Multimodal Autoregressive Model for Time-Aligned and Contextual Modalities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26804-26814} }
Neural Sign Actors: A Diffusion Model for 3D Sign Language Production from Text-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Baltatzis_2024_CVPR, author = {Baltatzis, Vasileios and Potamias, Rolandos Alexandros and Ververas, Evangelos and Sun, Guanxiong and Deng, Jiankang and Zafeiriou, Stefanos}, title = {Neural Sign Actors: A Diffusion Model for 3D Sign Language Production from Text}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1985-1995} }
On the Diversity and Realism of Distilled Dataset: An Efficient Dataset Distillation Paradigm-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2024_CVPR, author = {Sun, Peng and Shi, Bei and Yu, Daiwei and Lin, Tao}, title = {On the Diversity and Realism of Distilled Dataset: An Efficient Dataset Distillation Paradigm}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9390-9399} }
Semantics-aware Motion Retargeting with Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Haodong and Chen, Zhike and Xu, Haocheng and Hao, Lei and Wu, Xiaofei and Xu, Songcen and Zhang, Zhensong and Wang, Yue and Xiong, Rong}, title = {Semantics-aware Motion Retargeting with Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2155-2164} }
Semantically-Shifted Incremental Adapter-Tuning is A Continual ViTransformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tan_2024_CVPR, author = {Tan, Yuwen and Zhou, Qinhao and Xiang, Xiang and Wang, Ke and Wu, Yuchuan and Li, Yongbin}, title = {Semantically-Shifted Incremental Adapter-Tuning is A Continual ViTransformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23252-23262} }
Low-Rank Approximation for Sparse Attention in Multi-Modal LLMs-
[pdf]
[bibtex]@InProceedings{Song_2024_CVPR, author = {Song, Lin and Chen, Yukang and Yang, Shuai and Ding, Xiaohan and Ge, Yixiao and Chen, Ying-Cong and Shan, Ying}, title = {Low-Rank Approximation for Sparse Attention in Multi-Modal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13763-13773} }
TASeg: Temporal Aggregation Network for LiDAR Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Xiaopei and Hou, Yuenan and Huang, Xiaoshui and Lin, Binbin and He, Tong and Zhu, Xinge and Ma, Yuexin and Wu, Boxi and Liu, Haifeng and Cai, Deng and Ouyang, Wanli}, title = {TASeg: Temporal Aggregation Network for LiDAR Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15311-15320} }
Bootstrapping SparseFormers from Vision Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2024_CVPR, author = {Gao, Ziteng and Tong, Zhan and Lin, Kevin Qinghong and Chen, Joya and Shou, Mike Zheng}, title = {Bootstrapping SparseFormers from Vision Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17710-17721} }
EventPS: Real-Time Photometric Stereo Using an Event Camera-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Bohan and Ren, Jieji and Han, Jin and Wang, Feishi and Liang, Jinxiu and Shi, Boxin}, title = {EventPS: Real-Time Photometric Stereo Using an Event Camera}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9602-9611} }
Unsupervised Semantic Segmentation Through Depth-Guided Feature Correlation and Sampling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sick_2024_CVPR, author = {Sick, Leon and Engel, Dominik and Hermosilla, Pedro and Ropinski, Timo}, title = {Unsupervised Semantic Segmentation Through Depth-Guided Feature Correlation and Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3637-3646} }
On the Road to Portability: Compressing End-to-End Motion Planner for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2024_CVPR, author = {Feng, Kaituo and Li, Changsheng and Ren, Dongchun and Yuan, Ye and Wang, Guoren}, title = {On the Road to Portability: Compressing End-to-End Motion Planner for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15099-15108} }
RAVE: Randomized Noise Shuffling for Fast and Consistent Video Editing with Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kara_2024_CVPR, author = {Kara, Ozgur and Kurtkaya, Bariscan and Yesiltepe, Hidir and Rehg, James M. and Yanardag, Pinar}, title = {RAVE: Randomized Noise Shuffling for Fast and Consistent Video Editing with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6507-6516} }
PredToken: Predicting Unknown Tokens and Beyond with Coarse-to-Fine Iterative Decoding-
[pdf]
[bibtex]@InProceedings{Nie_2024_CVPR, author = {Nie, Xuesong and Jin, Haoyuan and Yan, Yunfeng and Chen, Xi and Zhu, Zhihang and Qi, Donglian}, title = {PredToken: Predicting Unknown Tokens and Beyond with Coarse-to-Fine Iterative Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18143-18152} }
Video-Based Human Pose Regression via Decoupled Space-Time Aggregation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2024_CVPR, author = {He, Jijie and Yang, Wenwu}, title = {Video-Based Human Pose Regression via Decoupled Space-Time Aggregation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1022-1031} }
L-MAGIC: Language Model Assisted Generation of Images with Coherence-
[pdf]
[supp]
[bibtex]@InProceedings{Cai_2024_CVPR, author = {Cai, Zhipeng and Mueller, Matthias and Birkl, Reiner and Wofk, Diana and Tseng, Shao-Yen and Cheng, Junda and Stan, Gabriela Ben-Melech and Lai, Vasudev and Paulitsch, Michael}, title = {L-MAGIC: Language Model Assisted Generation of Images with Coherence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7049-7058} }
3D Face Tracking from 2D Video through Iterative Dense UV to Image Flow-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Taubner_2024_CVPR, author = {Taubner, Felix and Raina, Prashant and Tuli, Mathieu and Teh, Eu Wern and Lee, Chul and Huang, Jinmiao}, title = {3D Face Tracking from 2D Video through Iterative Dense UV to Image Flow}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1227-1237} }
Carve3D: Improving Multi-view Reconstruction Consistency for Diffusion Models with RL Finetuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2024_CVPR, author = {Xie, Desai and Li, Jiahao and Tan, Hao and Sun, Xin and Shu, Zhixin and Zhou, Yi and Bi, Sai and Pirk, S\"oren and Kaufman, Arie E.}, title = {Carve3D: Improving Multi-view Reconstruction Consistency for Diffusion Models with RL Finetuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6369-6379} }
Random Entangled Tokens for Adversarially Robust Vision Transformer-
[pdf]
[supp]
[bibtex]@InProceedings{Gong_2024_CVPR, author = {Gong, Huihui and Dong, Minjing and Ma, Siqi and Camtepe, Seyit and Nepal, Surya and Xu, Chang}, title = {Random Entangled Tokens for Adversarially Robust Vision Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24554-24563} }
Shadow Generation for Composite Image Using Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Qingyang and You, Junqi and Wang, Jianting and Tao, Xinhao and Zhang, Bo and Niu, Li}, title = {Shadow Generation for Composite Image Using Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8121-8130} }
DisCo: Disentangled Control for Realistic Human Dance Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Tan and Li, Linjie and Lin, Kevin and Zhai, Yuanhao and Lin, Chung-Ching and Yang, Zhengyuan and Zhang, Hanwang and Liu, Zicheng and Wang, Lijuan}, title = {DisCo: Disentangled Control for Realistic Human Dance Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9326-9336} }
L2B: Learning to Bootstrap Robust Models for Combating Label Noise-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Yuyin and Li, Xianhang and Liu, Fengze and Wei, Qingyue and Chen, Xuxi and Yu, Lequan and Xie, Cihang and Lungren, Matthew P. and Xing, Lei}, title = {L2B: Learning to Bootstrap Robust Models for Combating Label Noise}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23523-23533} }
GaussianShader: 3D Gaussian Splatting with Shading Functions for Reflective Surfaces-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2024_CVPR, author = {Jiang, Yingwenqi and Tu, Jiadong and Liu, Yuan and Gao, Xifeng and Long, Xiaoxiao and Wang, Wenping and Ma, Yuexin}, title = {GaussianShader: 3D Gaussian Splatting with Shading Functions for Reflective Surfaces}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5322-5332} }
Tactile-Augmented Radiance Fields-
[pdf]
[arXiv]
[bibtex]@InProceedings{Dou_2024_CVPR, author = {Dou, Yiming and Yang, Fengyu and Liu, Yi and Loquercio, Antonio and Owens, Andrew}, title = {Tactile-Augmented Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26529-26539} }
Intensity-Robust Autofocus for Spike Camera-
[pdf]
[supp]
[bibtex]@InProceedings{Su_2024_CVPR, author = {Su, Changqing and Ye, Zhiyuan and Xiao, Yongsheng and Zhou, You and Cheng, Zhen and Xiong, Bo and Yu, Zhaofei and Huang, Tiejun}, title = {Intensity-Robust Autofocus for Spike Camera}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25018-25027} }
FairCLIP: Harnessing Fairness in Vision-Language Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2024_CVPR, author = {Luo, Yan and Shi, Min and Khan, Muhammad Osama and Afzal, Muhammad Muneeb and Huang, Hao and Yuan, Shuaihang and Tian, Yu and Song, Luo and Kouhana, Ava and Elze, Tobias and Fang, Yi and Wang, Mengyu}, title = {FairCLIP: Harnessing Fairness in Vision-Language Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12289-12301} }
StreamingFlow: Streaming Occupancy Forecasting with Asynchronous Multi-modal Data Streams via Neural Ordinary Differential Equation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2024_CVPR, author = {Shi, Yining and Jiang, Kun and Wang, Ke and Li, Jiusi and Wang, Yunlong and Yang, Mengmeng and Yang, Diange}, title = {StreamingFlow: Streaming Occupancy Forecasting with Asynchronous Multi-modal Data Streams via Neural Ordinary Differential Equation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14833-14842} }
pix2gestalt: Amodal Segmentation by Synthesizing Wholes-
[pdf]
[bibtex]@InProceedings{Ozguroglu_2024_CVPR, author = {Ozguroglu, Ege and Liu, Ruoshi and Sur{\'\i}s, D{\'\i}dac and Chen, Dian and Dave, Achal and Tokmakov, Pavel and Vondrick, Carl}, title = {pix2gestalt: Amodal Segmentation by Synthesizing Wholes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3931-3940} }
Weakly Supervised Point Cloud Semantic Segmentation via Artificial Oracle-
[pdf]
[supp]
[bibtex]@InProceedings{Kweon_2024_CVPR, author = {Kweon, Hyeokjun and Kim, Jihun and Yoon, Kuk-Jin}, title = {Weakly Supervised Point Cloud Semantic Segmentation via Artificial Oracle}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3721-3731} }
Language Model Guided Interpretable Video Action Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Ning and Zhu, Guangming and Li, HS and Zhang, Liang and Shah, Syed Afaq Ali and Bennamoun, Mohammed}, title = {Language Model Guided Interpretable Video Action Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18878-18887} }
Forecasting of 3D Whole-body Human Poses with Grasping Objects-
[pdf]
[bibtex]@InProceedings{Yan_2024_CVPR, author = {Yan, Haitao and Cui, Qiongjie and Xie, Jiexin and Guo, Shijie}, title = {Forecasting of 3D Whole-body Human Poses with Grasping Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1726-1736} }
COTR: Compact Occupancy TRansformer for Vision-based 3D Occupancy Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2024_CVPR, author = {Ma, Qihang and Tan, Xin and Qu, Yanyun and Ma, Lizhuang and Zhang, Zhizhong and Xie, Yuan}, title = {COTR: Compact Occupancy TRansformer for Vision-based 3D Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19936-19945} }
Accelerating Diffusion Sampling with Optimized Time Steps-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xue_2024_CVPR, author = {Xue, Shuchen and Liu, Zhaoqiang and Chen, Fei and Zhang, Shifeng and Hu, Tianyang and Xie, Enze and Li, Zhenguo}, title = {Accelerating Diffusion Sampling with Optimized Time Steps}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8292-8301} }
See Say and Segment: Teaching LMMs to Overcome False Premises-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Tsung-Han and Biamby, Giscard and Chan, David and Dunlap, Lisa and Gupta, Ritwik and Wang, Xudong and Gonzalez, Joseph E. and Darrell, Trevor}, title = {See Say and Segment: Teaching LMMs to Overcome False Premises}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13459-13469} }
Is Ego Status All You Need for Open-Loop End-to-End Autonomous Driving?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Zhiqi and Yu, Zhiding and Lan, Shiyi and Li, Jiahan and Kautz, Jan and Lu, Tong and Alvarez, Jose M.}, title = {Is Ego Status All You Need for Open-Loop End-to-End Autonomous Driving?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14864-14873} }
Unsupervised Template-assisted Point Cloud Shape Correspondence Network-
[pdf]
[arXiv]
[bibtex]@InProceedings{Deng_2024_CVPR, author = {Deng, Jiacheng and Lu, Jiahao and Zhang, Tianzhu}, title = {Unsupervised Template-assisted Point Cloud Shape Correspondence Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5250-5259} }
CGI-DM: Digital Copyright Authentication for Diffusion Models via Contrasting Gradient Inversion-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Xiaoyu and Hua, Yang and Liang, Chumeng and Zhang, Jiaru and Wang, Hao and Song, Tao and Guan, Haibing}, title = {CGI-DM: Digital Copyright Authentication for Diffusion Models via Contrasting Gradient Inversion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10812-10821} }
Making Visual Sense of Oracle Bones for You and Me-
[pdf]
[supp]
[bibtex]@InProceedings{Qiao_2024_CVPR, author = {Qiao, Runqi and Yang, Lan and Pang, Kaiyue and Zhang, Honggang}, title = {Making Visual Sense of Oracle Bones for You and Me}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12656-12665} }
Finsler-Laplace-Beltrami Operators with Application to Shape Analysis-
[pdf]
[supp]
[bibtex]@InProceedings{Weber_2024_CVPR, author = {Weber, Simon and Dag\`es, Thomas and Gao, Maolin and Cremers, Daniel}, title = {Finsler-Laplace-Beltrami Operators with Application to Shape Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3131-3140} }
Minimal Perspective Autocalibration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cin_2024_CVPR, author = {Cin, Andrea Porfiri Dal and Duff, Timothy and Magri, Luca and Pajdla, Tomas}, title = {Minimal Perspective Autocalibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5064-5073} }
MOHO: Learning Single-view Hand-held Object Reconstruction with Multi-view Occlusion-Aware Supervision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Chenyangguang and Jiao, Guanlong and Di, Yan and Wang, Gu and Huang, Ziqin and Zhang, Ruida and Manhardt, Fabian and Fu, Bowen and Tombari, Federico and Ji, Xiangyang}, title = {MOHO: Learning Single-view Hand-held Object Reconstruction with Multi-view Occlusion-Aware Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9992-10002} }
BANF: Band-Limited Neural Fields for Levels of Detail Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shabanov_2024_CVPR, author = {Shabanov, Akhmedkhan and Govindarajan, Shrisudhan and Reading, Cody and Goli, Lily and Rebain, Daniel and Yi, Kwang Moo and Tagliasacchi, Andrea}, title = {BANF: Band-Limited Neural Fields for Levels of Detail Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20571-20580} }
Time- Memory- and Parameter-Efficient Visual Adaptation-
[pdf]
[supp]
[bibtex]@InProceedings{Mercea_2024_CVPR, author = {Mercea, Otniel-Bogdan and Gritsenko, Alexey and Schmid, Cordelia and Arnab, Anurag}, title = {Time- Memory- and Parameter-Efficient Visual Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5536-5545} }
SecondPose: SE(3)-Consistent Dual-Stream Feature Fusion for Category-Level Pose Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Yamei and Di, Yan and Zhai, Guangyao and Manhardt, Fabian and Zhang, Chenyangguang and Zhang, Ruida and Tombari, Federico and Navab, Nassir and Busam, Benjamin}, title = {SecondPose: SE(3)-Consistent Dual-Stream Feature Fusion for Category-Level Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9959-9969} }
Physical Property Understanding from Language-Embedded Feature Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhai_2024_CVPR, author = {Zhai, Albert J. and Shen, Yuan and Chen, Emily Y. and Wang, Gloria X. and Wang, Xinlei and Wang, Sheng and Guan, Kaiyu and Wang, Shenlong}, title = {Physical Property Understanding from Language-Embedded Feature Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28296-28305} }
EgoGen: An Egocentric Synthetic Data Generator-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Gen and Zhao, Kaifeng and Zhang, Siwei and Lyu, Xiaozhong and Dusmanu, Mihai and Zhang, Yan and Pollefeys, Marc and Tang, Siyu}, title = {EgoGen: An Egocentric Synthetic Data Generator}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14497-14509} }
Suppress and Rebalance: Towards Generalized Multi-Modal Face Anti-Spoofing-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lin_2024_CVPR, author = {Lin, Xun and Wang, Shuai and Cai, Rizhao and Liu, Yizhong and Fu, Ying and Tang, Wenzhong and Yu, Zitong and Kot, Alex}, title = {Suppress and Rebalance: Towards Generalized Multi-Modal Face Anti-Spoofing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {211-221} }
LEAD: Exploring Logit Space Evolution for Model Selection-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2024_CVPR, author = {Hu, Zixuan and Li, Xiaotong and Tang, Shixiang and Liu, Jun and Hu, Yichun and Duan, Ling-Yu}, title = {LEAD: Exploring Logit Space Evolution for Model Selection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28664-28673} }
Video ReCap: Recursive Captioning of Hour-Long Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Islam_2024_CVPR, author = {Islam, Md Mohaiminul and Ho, Ngan and Yang, Xitong and Nagarajan, Tushar and Torresani, Lorenzo and Bertasius, Gedas}, title = {Video ReCap: Recursive Captioning of Hour-Long Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18198-18208} }
Towards Realistic Scene Generation with LiDAR Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ran_2024_CVPR, author = {Ran, Haoxi and Guizilini, Vitor and Wang, Yue}, title = {Towards Realistic Scene Generation with LiDAR Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14738-14748} }
Diffusion Reflectance Map: Single-Image Stochastic Inverse Rendering of Illumination and Reflectance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Enyo_2024_CVPR, author = {Enyo, Yuto and Nishino, Ko}, title = {Diffusion Reflectance Map: Single-Image Stochastic Inverse Rendering of Illumination and Reflectance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11873-11883} }
Universal Segmentation at Arbitrary Granularity with Language Instruction-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Yong and Zhang, Cairong and Wang, Yitong and Wang, Jiahao and Yang, Yujiu and Tang, Yansong}, title = {Universal Segmentation at Arbitrary Granularity with Language Instruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3459-3469} }
GaussianAvatars: Photorealistic Head Avatars with Rigged 3D Gaussians-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qian_2024_CVPR, author = {Qian, Shenhan and Kirschstein, Tobias and Schoneveld, Liam and Davoli, Davide and Giebenhain, Simon and Nie{\ss}ner, Matthias}, title = {GaussianAvatars: Photorealistic Head Avatars with Rigged 3D Gaussians}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20299-20309} }
MMMU: A Massive Multi-discipline Multimodal Understanding and Reasoning Benchmark for Expert AGI-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yue_2024_CVPR, author = {Yue, Xiang and Ni, Yuansheng and Zhang, Kai and Zheng, Tianyu and Liu, Ruoqi and Zhang, Ge and Stevens, Samuel and Jiang, Dongfu and Ren, Weiming and Sun, Yuxuan and Wei, Cong and Yu, Botao and Yuan, Ruibin and Sun, Renliang and Yin, Ming and Zheng, Boyuan and Yang, Zhenzhu and Liu, Yibo and Huang, Wenhao and Sun, Huan and Su, Yu and Chen, Wenhu}, title = {MMMU: A Massive Multi-discipline Multimodal Understanding and Reasoning Benchmark for Expert AGI}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9556-9567} }
Layout-Agnostic Scene Text Image Synthesis with Diffusion Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhangli_2024_CVPR, author = {Zhangli, Qilong and Jiang, Jindong and Liu, Di and Yu, Licheng and Dai, Xiaoliang and Ramchandani, Ankit and Pang, Guan and Metaxas, Dimitris N. and Krishnan, Praveen}, title = {Layout-Agnostic Scene Text Image Synthesis with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7496-7506} }
EarthLoc: Astronaut Photography Localization by Indexing Earth from Space-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Berton_2024_CVPR, author = {Berton, Gabriele and Stoken, Alex and Caputo, Barbara and Masone, Carlo}, title = {EarthLoc: Astronaut Photography Localization by Indexing Earth from Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12754-12764} }
SmartMask: Context Aware High-Fidelity Mask Generation for Fine-grained Object Insertion and Layout Control-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Singh_2024_CVPR, author = {Singh, Jaskirat and Zhang, Jianming and Liu, Qing and Smith, Cameron and Lin, Zhe and Zheng, Liang}, title = {SmartMask: Context Aware High-Fidelity Mask Generation for Fine-grained Object Insertion and Layout Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6497-6506} }
Text-Image Alignment for Diffusion-Based Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kondapaneni_2024_CVPR, author = {Kondapaneni, Neehar and Marks, Markus and Knott, Manuel and Guimaraes, Rogerio and Perona, Pietro}, title = {Text-Image Alignment for Diffusion-Based Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13883-13893} }
Customization Assistant for Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Yufan and Zhang, Ruiyi and Gu, Jiuxiang and Sun, Tong}, title = {Customization Assistant for Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9182-9191} }
GaussianEditor: Editing 3D Gaussians Delicately with Text Instructions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Junjie and Fang, Jiemin and Zhang, Xiaopeng and Xie, Lingxi and Tian, Qi}, title = {GaussianEditor: Editing 3D Gaussians Delicately with Text Instructions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20902-20911} }
MemFlow: Optical Flow Estimation and Prediction with Memory-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dong_2024_CVPR, author = {Dong, Qiaole and Fu, Yanwei}, title = {MemFlow: Optical Flow Estimation and Prediction with Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19068-19078} }
Novel Class Discovery for Ultra-Fine-Grained Visual Categorization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Yu and Cai, Yaqi and Jia, Qi and Qiu, Binglin and Wang, Weimin and Pu, Nan}, title = {Novel Class Discovery for Ultra-Fine-Grained Visual Categorization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17679-17688} }
GenHowTo: Learning to Generate Actions and State Transformations from Instructional Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Soucek_2024_CVPR, author = {Sou\v{c}ek, Tom\'a\v{s} and Damen, Dima and Wray, Michael and Laptev, Ivan and Sivic, Josef}, title = {GenHowTo: Learning to Generate Actions and State Transformations from Instructional Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6561-6571} }
Paint-it: Text-to-Texture Synthesis via Deep Convolutional Texture Map Optimization and Physically-Based Rendering-
[pdf]
[supp]
[bibtex]@InProceedings{Youwang_2024_CVPR, author = {Youwang, Kim and Oh, Tae-Hyun and Pons-Moll, Gerard}, title = {Paint-it: Text-to-Texture Synthesis via Deep Convolutional Texture Map Optimization and Physically-Based Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4347-4356} }
HiKER-SGG: Hierarchical Knowledge Enhanced Robust Scene Graph Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Ce and Stepputtis, Simon and Campbell, Joseph and Sycara, Katia and Xie, Yaqi}, title = {HiKER-SGG: Hierarchical Knowledge Enhanced Robust Scene Graph Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28233-28243} }
DiffusionGAN3D: Boosting Text-guided 3D Generation and Domain Adaptation by Combining 3D GANs and Diffusion Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lei_2024_CVPR, author = {Lei, Biwen and Yu, Kai and Feng, Mengyang and Cui, Miaomiao and Xie, Xuansong}, title = {DiffusionGAN3D: Boosting Text-guided 3D Generation and Domain Adaptation by Combining 3D GANs and Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10487-10497} }
Physics-Aware Hand-Object Interaction Denoising-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2024_CVPR, author = {Luo, Haowen and Liu, Yunze and Yi, Li}, title = {Physics-Aware Hand-Object Interaction Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2341-2350} }
VastGaussian: Vast 3D Gaussians for Large Scene Reconstruction-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lin_2024_CVPR, author = {Lin, Jiaqi and Li, Zhihao and Tang, Xiao and Liu, Jianzhuang and Liu, Shiyong and Liu, Jiayue and Lu, Yangdi and Wu, Xiaofei and Xu, Songcen and Yan, Youliang and Yang, Wenming}, title = {VastGaussian: Vast 3D Gaussians for Large Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5166-5175} }
Edit One for All: Interactive Batch Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2024_CVPR, author = {Nguyen, Thao and Ojha, Utkarsh and Li, Yuheng and Liu, Haotian and Lee, Yong Jae}, title = {Edit One for All: Interactive Batch Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8271-8280} }
Rethinking Boundary Discontinuity Problem for Oriented Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Hang and Liu, Xinyuan and Xu, Haonan and Ma, Yike and Zhu, Zunjie and Yan, Chenggang and Dai, Feng}, title = {Rethinking Boundary Discontinuity Problem for Oriented Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17406-17415} }
Deformable One-shot Face Stylization via DINO Semantic Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Yang and Chen, Zichong and Huang, Hui}, title = {Deformable One-shot Face Stylization via DINO Semantic Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7787-7796} }
SleepVST: Sleep Staging from Near-Infrared Video Signals using Pre-Trained Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Carter_2024_CVPR, author = {Carter, Jonathan F. and Jorge, Jo\~ao and Gibson, Oliver and Tarassenko, Lionel}, title = {SleepVST: Sleep Staging from Near-Infrared Video Signals using Pre-Trained Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12479-12489} }
Coarse-to-Fine Latent Diffusion for Pose-Guided Person Image Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2024_CVPR, author = {Lu, Yanzuo and Zhang, Manlin and Ma, Andy J and Xie, Xiaohua and Lai, Jianhuang}, title = {Coarse-to-Fine Latent Diffusion for Pose-Guided Person Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6420-6429} }
Watermark-embedded Adversarial Examples for Copyright Protection against Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Peifei and Takahashi, Tsubasa and Kataoka, Hirokatsu}, title = {Watermark-embedded Adversarial Examples for Copyright Protection against Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24420-24430} }
TCP:Textual-based Class-aware Prompt tuning for Visual-Language Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yao_2024_CVPR, author = {Yao, Hantao and Zhang, Rui and Xu, Changsheng}, title = {TCP:Textual-based Class-aware Prompt tuning for Visual-Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23438-23448} }
OMG: Towards Open-vocabulary Motion Generation via Mixture of Controllers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2024_CVPR, author = {Liang, Han and Bao, Jiacheng and Zhang, Ruichi and Ren, Sihan and Xu, Yuecheng and Yang, Sibei and Chen, Xin and Yu, Jingyi and Xu, Lan}, title = {OMG: Towards Open-vocabulary Motion Generation via Mixture of Controllers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {482-493} }
TimeChat: A Time-sensitive Multimodal Large Language Model for Long Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2024_CVPR, author = {Ren, Shuhuai and Yao, Linli and Li, Shicheng and Sun, Xu and Hou, Lu}, title = {TimeChat: A Time-sensitive Multimodal Large Language Model for Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14313-14323} }
Align Your Gaussians: Text-to-4D with Dynamic 3D Gaussians and Composed Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ling_2024_CVPR, author = {Ling, Huan and Kim, Seung Wook and Torralba, Antonio and Fidler, Sanja and Kreis, Karsten}, title = {Align Your Gaussians: Text-to-4D with Dynamic 3D Gaussians and Composed Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8576-8588} }
PDF: A Probability-Driven Framework for Open World 3D Point Cloud Semantic Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Jinfeng and Yang, Siyuan and Li, Xianzhi and Tang, Yuan and Hao, Yixue and Hu, Long and Chen, Min}, title = {PDF: A Probability-Driven Framework for Open World 3D Point Cloud Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5977-5986} }
Test-Time Domain Generalization for Face Anti-Spoofing-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Qianyu and Zhang, Ke-Yue and Yao, Taiping and Lu, Xuequan and Ding, Shouhong and Ma, Lizhuang}, title = {Test-Time Domain Generalization for Face Anti-Spoofing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {175-187} }
DiffusionMTL: Learning Multi-Task Denoising Diffusion Model from Partially Annotated Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2024_CVPR, author = {Ye, Hanrong and Xu, Dan}, title = {DiffusionMTL: Learning Multi-Task Denoising Diffusion Model from Partially Annotated Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27960-27969} }
Spike-guided Motion Deblurring with Unknown Modal Spatiotemporal Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Jiyuan and Chen, Shiyan and Zheng, Yajing and Yu, Zhaofei and Huang, Tiejun}, title = {Spike-guided Motion Deblurring with Unknown Modal Spatiotemporal Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25047-25057} }
VRP-SAM: SAM with Visual Reference Prompt-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2024_CVPR, author = {Sun, Yanpeng and Chen, Jiahui and Zhang, Shan and Zhang, Xinyu and Chen, Qiang and Zhang, Gang and Ding, Errui and Wang, Jingdong and Li, Zechao}, title = {VRP-SAM: SAM with Visual Reference Prompt}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23565-23574} }
Discriminability-Driven Channel Selection for Out-of-Distribution Detection-
[pdf]
[bibtex]@InProceedings{Yuan_2024_CVPR, author = {Yuan, Yue and He, Rundong and Dong, Yicong and Han, Zhongyi and Yin, Yilong}, title = {Discriminability-Driven Channel Selection for Out-of-Distribution Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26171-26180} }
ManiFPT: Defining and Analyzing Fingerprints of Generative Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2024_CVPR, author = {Song, Hae Jin and Khayatkhoei, Mahyar and AbdAlmageed, Wael}, title = {ManiFPT: Defining and Analyzing Fingerprints of Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10791-10801} }
Real-time 3D-aware Portrait Video Relighting-
[pdf]
[supp]
[bibtex]@InProceedings{Cai_2024_CVPR, author = {Cai, Ziqi and Jiang, Kaiwen and Chen, Shu-Yu and Lai, Yu-Kun and Fu, Hongbo and Shi, Boxin and Gao, Lin}, title = {Real-time 3D-aware Portrait Video Relighting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6221-6231} }
3DGS-Avatar: Animatable Avatars via Deformable 3D Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Qian_2024_CVPR, author = {Qian, Zhiyin and Wang, Shaofei and Mihajlovic, Marko and Geiger, Andreas and Tang, Siyu}, title = {3DGS-Avatar: Animatable Avatars via Deformable 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5020-5030} }
Quilt-LLaVA: Visual Instruction Tuning by Extracting Localized Narratives from Open-Source Histopathology Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Seyfioglu_2024_CVPR, author = {Seyfioglu, Mehmet Saygin and Ikezogwo, Wisdom O. and Ghezloo, Fatemeh and Krishna, Ranjay and Shapiro, Linda}, title = {Quilt-LLaVA: Visual Instruction Tuning by Extracting Localized Narratives from Open-Source Histopathology Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13183-13192} }
Traffic Scene Parsing through the TSP6K Dataset-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2024_CVPR, author = {Jiang, Peng-Tao and Yang, Yuqi and Cao, Yang and Hou, Qibin and Cheng, Ming-Ming and Shen, Chunhua}, title = {Traffic Scene Parsing through the TSP6K Dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21874-21885} }
Style Aligned Image Generation via Shared Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hertz_2024_CVPR, author = {Hertz, Amir and Voynov, Andrey and Fruchter, Shlomi and Cohen-Or, Daniel}, title = {Style Aligned Image Generation via Shared Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4775-4785} }
E-GPS: Explainable Geometry Problem Solving via Top-Down Solver and Bottom-Up Generator-
[pdf]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Wenjun and Zhang, Lingling and Liu, Jun and Tang, Xi and Wang, Yaxian and Wang, Shaowei and Wang, Qianying}, title = {E-GPS: Explainable Geometry Problem Solving via Top-Down Solver and Bottom-Up Generator}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13828-13837} }
Back to 3D: Few-Shot 3D Keypoint Detection with Back-Projected 2D Features-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wimmer_2024_CVPR, author = {Wimmer, Thomas and Wonka, Peter and Ovsjanikov, Maks}, title = {Back to 3D: Few-Shot 3D Keypoint Detection with Back-Projected 2D Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4154-4164} }
Fourier Priors-Guided Diffusion for Zero-Shot Joint Low-Light Enhancement and Deblurring-
[pdf]
[bibtex]@InProceedings{Lv_2024_CVPR, author = {Lv, Xiaoqian and Zhang, Shengping and Wang, Chenyang and Zheng, Yichen and Zhong, Bineng and Li, Chongyi and Nie, Liqiang}, title = {Fourier Priors-Guided Diffusion for Zero-Shot Joint Low-Light Enhancement and Deblurring}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25378-25388} }
Neural Markov Random Field for Stereo Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guan_2024_CVPR, author = {Guan, Tongfan and Wang, Chen and Liu, Yun-Hui}, title = {Neural Markov Random Field for Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5459-5469} }
Driving into the Future: Multiview Visual Forecasting and Planning with World Model for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Yuqi and He, Jiawei and Fan, Lue and Li, Hongxin and Chen, Yuntao and Zhang, Zhaoxiang}, title = {Driving into the Future: Multiview Visual Forecasting and Planning with World Model for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14749-14759} }
OpenESS: Event-based Semantic Scene Understanding with Open Vocabularies-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kong_2024_CVPR, author = {Kong, Lingdong and Liu, Youquan and Ng, Lai Xing and Cottereau, Benoit R. and Ooi, Wei Tsang}, title = {OpenESS: Event-based Semantic Scene Understanding with Open Vocabularies}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15686-15698} }
Do Vision and Language Encoders Represent the World Similarly?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Maniparambil_2024_CVPR, author = {Maniparambil, Mayug and Akshulakov, Raiymbek and Djilali, Yasser Abdelaziz Dahou and El Amine Seddik, Mohamed and Narayan, Sanath and Mangalam, Karttikeya and O'Connor, Noel E.}, title = {Do Vision and Language Encoders Represent the World Similarly?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14334-14343} }
MGMap: Mask-Guided Learning for Online Vectorized HD Map Construction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Xiaolu and Wang, Song and Li, Wentong and Yang, Ruizi and Chen, Junbo and Zhu, Jianke}, title = {MGMap: Mask-Guided Learning for Online Vectorized HD Map Construction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14812-14821} }
Scaling Up to Excellence: Practicing Model Scaling for Photo-Realistic Image Restoration In the Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Fanghua and Gu, Jinjin and Li, Zheyuan and Hu, Jinfan and Kong, Xiangtao and Wang, Xintao and He, Jingwen and Qiao, Yu and Dong, Chao}, title = {Scaling Up to Excellence: Practicing Model Scaling for Photo-Realistic Image Restoration In the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25669-25680} }
Q-Instruct: Improving Low-level Visual Abilities for Multi-modality Foundation Models-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Haoning and Zhang, Zicheng and Zhang, Erli and Chen, Chaofeng and Liao, Liang and Wang, Annan and Xu, Kaixin and Li, Chunyi and Hou, Jingwen and Zhai, Guangtao and Xue, Geng and Sun, Wenxiu and Yan, Qiong and Lin, Weisi}, title = {Q-Instruct: Improving Low-level Visual Abilities for Multi-modality Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25490-25500} }
PoseIRM: Enhance 3D Human Pose Estimation on Unseen Camera Settings via Invariant Risk Minimization-
[pdf]
[supp]
[bibtex]@InProceedings{Cai_2024_CVPR, author = {Cai, Yanlu and Zhang, Weizhong and Wu, Yuan and Jin, Cheng}, title = {PoseIRM: Enhance 3D Human Pose Estimation on Unseen Camera Settings via Invariant Risk Minimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2124-2133} }
Zero-Shot Structure-Preserving Diffusion Model for High Dynamic Range Tone Mapping-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Ruoxi and Xu, Shusong and Liu, Peiye and Li, Sicheng and Lu, Yanheng and Niu, Dimin and Liu, Zihao and Meng, Zihao and Li, Zhiyong and Chen, Xinhua and Fan, Yibo}, title = {Zero-Shot Structure-Preserving Diffusion Model for High Dynamic Range Tone Mapping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26130-26139} }
VidLA: Video-Language Alignment at Scale-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rizve_2024_CVPR, author = {Rizve, Mamshad Nayeem and Fei, Fan and Unnikrishnan, Jayakrishnan and Tran, Son and Yao, Benjamin Z. and Zeng, Belinda and Shah, Mubarak and Chilimbi, Trishul}, title = {VidLA: Video-Language Alignment at Scale}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14043-14055} }
VoCo: A Simple-yet-Effective Volume Contrastive Learning Framework for 3D Medical Image Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Linshan and Zhuang, Jiaxin and Chen, Hao}, title = {VoCo: A Simple-yet-Effective Volume Contrastive Learning Framework for 3D Medical Image Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22873-22882} }
CCEdit: Creative and Controllable Video Editing via Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2024_CVPR, author = {Feng, Ruoyu and Weng, Wenming and Wang, Yanhui and Yuan, Yuhui and Bao, Jianmin and Luo, Chong and Chen, Zhibo and Guo, Baining}, title = {CCEdit: Creative and Controllable Video Editing via Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6712-6722} }
IPoD: Implicit Field Learning with Point Diffusion for Generalizable 3D Object Reconstruction from Single RGB-D Images-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Yushuang and Shi, Luyue and Cai, Junhao and Yuan, Weihao and Qiu, Lingteng and Dong, Zilong and Bo, Liefeng and Cui, Shuguang and Han, Xiaoguang}, title = {IPoD: Implicit Field Learning with Point Diffusion for Generalizable 3D Object Reconstruction from Single RGB-D Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20432-20442} }
HAVE-FUN: Human Avatar Reconstruction from Few-Shot Unconstrained Images-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Xihe and Chen, Xingyu and Gao, Daiheng and Wang, Shaohui and Han, Xiaoguang and Wang, Baoyuan}, title = {HAVE-FUN: Human Avatar Reconstruction from Few-Shot Unconstrained Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {742-752} }
ERMVP: Communication-Efficient and Collaboration-Robust Multi-Vehicle Perception in Challenging Environments-
[pdf]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Jingyu and Yang, Kun and Wang, Yilei and Wang, Hanqi and Sun, Peng and Song, Liang}, title = {ERMVP: Communication-Efficient and Collaboration-Robust Multi-Vehicle Perception in Challenging Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12575-12584} }
DiffMorpher: Unleashing the Capability of Diffusion Models for Image Morphing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Kaiwen and Zhou, Yifan and Xu, Xudong and Dai, Bo and Pan, Xingang}, title = {DiffMorpher: Unleashing the Capability of Diffusion Models for Image Morphing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7912-7921} }
Towards Real-World HDR Video Reconstruction: A Large-Scale Benchmark Dataset and A Two-Stage Alignment Network-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shu_2024_CVPR, author = {Shu, Yong and Shen, Liquan and Hu, Xiangyu and Li, Mengyao and Zhou, Zihao}, title = {Towards Real-World HDR Video Reconstruction: A Large-Scale Benchmark Dataset and A Two-Stage Alignment Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2879-2888} }
Efficient 3D Implicit Head Avatar with Mesh-anchored Hash Table Blendshapes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bai_2024_CVPR, author = {Bai, Ziqian and Tan, Feitong and Fanello, Sean and Pandey, Rohit and Dou, Mingsong and Liu, Shichen and Tan, Ping and Zhang, Yinda}, title = {Efficient 3D Implicit Head Avatar with Mesh-anchored Hash Table Blendshapes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1975-1984} }
PikeLPN: Mitigating Overlooked Inefficiencies of Low-Precision Neural Networks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Neseem_2024_CVPR, author = {Neseem, Marina and McCullough, Conor and Hsin, Randy and Leichner, Chas and Li, Shan and Chong, In Suk and Howard, Andrew and Lew, Lukasz and Reda, Sherief and Rautio, Ville-Mikko and Moro, Daniele}, title = {PikeLPN: Mitigating Overlooked Inefficiencies of Low-Precision Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15996-16005} }
CurveCloudNet: Processing Point Clouds with 1D Structure-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Stearns_2024_CVPR, author = {Stearns, Colton and Fu, Alex and Liu, Jiateng and Park, Jeong Joon and Rempe, Davis and Paschalidou, Despoina and Guibas, Leonidas J.}, title = {CurveCloudNet: Processing Point Clouds with 1D Structure}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27981-27991} }
CAGE: Controllable Articulation GEneration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Jiayi and Tam, Hou In Ivan and Mahdavi-Amiri, Ali and Savva, Manolis}, title = {CAGE: Controllable Articulation GEneration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17880-17889} }
No Time to Train: Empowering Non-Parametric Networks for Few-shot 3D Scene Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Xiangyang and Zhang, Renrui and He, Bowei and Guo, Ziyu and Liu, Jiaming and Xiao, Han and Fu, Chaoyou and Dong, Hao and Gao, Peng}, title = {No Time to Train: Empowering Non-Parametric Networks for Few-shot 3D Scene Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3838-3847} }
PhysGaussian: Physics-Integrated 3D Gaussians for Generative Dynamics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2024_CVPR, author = {Xie, Tianyi and Zong, Zeshun and Qiu, Yuxing and Li, Xuan and Feng, Yutao and Yang, Yin and Jiang, Chenfanfu}, title = {PhysGaussian: Physics-Integrated 3D Gaussians for Generative Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4389-4398} }
Spatio-Temporal Turbulence Mitigation: A Translational Perspective-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Xingguang and Chimitt, Nicholas and Chi, Yiheng and Mao, Zhiyuan and Chan, Stanley H.}, title = {Spatio-Temporal Turbulence Mitigation: A Translational Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2889-2899} }
FocusMAE: Gallbladder Cancer Detection from Ultrasound Videos with Focused Masked Autoencoders-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Basu_2024_CVPR, author = {Basu, Soumen and Gupta, Mayuna and Madan, Chetan and Gupta, Pankaj and Arora, Chetan}, title = {FocusMAE: Gallbladder Cancer Detection from Ultrasound Videos with Focused Masked Autoencoders}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11715-11725} }
Grounded Text-to-Image Synthesis with Attention Refocusing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Phung_2024_CVPR, author = {Phung, Quynh and Ge, Songwei and Huang, Jia-Bin}, title = {Grounded Text-to-Image Synthesis with Attention Refocusing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7932-7942} }
OpenStreetView-5M: The Many Roads to Global Visual Geolocation-
[pdf]
[supp]
[bibtex]@InProceedings{Astruc_2024_CVPR, author = {Astruc, Guillaume and Dufour, Nicolas and Siglidis, Ioannis and Aronssohn, Constantin and Bouia, Nacim and Fu, Stephanie and Loiseau, Romain and Nguyen, Van Nguyen and Raude, Charles and Vincent, Elliot and Xu, Lintao and Zhou, Hongyu and Landrieu, Loic}, title = {OpenStreetView-5M: The Many Roads to Global Visual Geolocation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21967-21977} }
Visual Concept Connectome (VCC): Open World Concept Discovery and their Interlayer Connections in Deep Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kowal_2024_CVPR, author = {Kowal, Matthew and Wildes, Richard P. and Derpanis, Konstantinos G.}, title = {Visual Concept Connectome (VCC): Open World Concept Discovery and their Interlayer Connections in Deep Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10895-10905} }
IReNe: Instant Recoloring of Neural Radiance Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mazzucchelli_2024_CVPR, author = {Mazzucchelli, Alessio and Garcia-Garcia, Adrian and Garces, Elena and Rivas-Manzaneque, Fernando and Moreno-Noguer, Francesc and Penate-Sanchez, Adrian}, title = {IReNe: Instant Recoloring of Neural Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5937-5946} }
Class Tokens Infusion for Weakly Supervised Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Yoon_2024_CVPR, author = {Yoon, Sung-Hoon and Kwon, Hoyong and Kim, Hyeonseong and Yoon, Kuk-Jin}, title = {Class Tokens Infusion for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3595-3605} }
FedHCA2: Towards Hetero-Client Federated Multi-Task Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2024_CVPR, author = {Lu, Yuxiang and Huang, Suizhi and Yang, Yuwen and Sirejiding, Shalayiding and Ding, Yue and Lu, Hongtao}, title = {FedHCA2: Towards Hetero-Client Federated Multi-Task Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5599-5609} }
Text-IF: Leveraging Semantic Text Guidance for Degradation-Aware and Interactive Image Fusion-
[pdf]
[supp]
[bibtex]@InProceedings{Yi_2024_CVPR, author = {Yi, Xunpeng and Xu, Han and Zhang, Hao and Tang, Linfeng and Ma, Jiayi}, title = {Text-IF: Leveraging Semantic Text Guidance for Degradation-Aware and Interactive Image Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27026-27035} }
GRAM: Global Reasoning for Multi-Page VQA-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Blau_2024_CVPR, author = {Blau, Tsachi and Fogel, Sharon and Ronen, Roi and Golts, Alona and Ganz, Roy and Ben Avraham, Elad and Aberdam, Aviad and Tsiper, Shahar and Litman, Ron}, title = {GRAM: Global Reasoning for Multi-Page VQA}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15598-15607} }
MS-DETR: Efficient DETR Training with Mixed Supervision-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Chuyang and Sun, Yifan and Wang, Wenhao and Chen, Qiang and Ding, Errui and Yang, Yi and Wang, Jingdong}, title = {MS-DETR: Efficient DETR Training with Mixed Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17027-17036} }
Learning to Produce Semi-dense Correspondences for Visual Localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Giang_2024_CVPR, author = {Giang, Khang Truong and Song, Soohwan and Jo, Sungho}, title = {Learning to Produce Semi-dense Correspondences for Visual Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19468-19478} }
Amodal Ground Truth and Completion in the Wild-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhan_2024_CVPR, author = {Zhan, Guanqi and Zheng, Chuanxia and Xie, Weidi and Zisserman, Andrew}, title = {Amodal Ground Truth and Completion in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28003-28013} }
Motion Diversification Networks-
[pdf]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Hee Jae and Ohn-Bar, Eshed}, title = {Motion Diversification Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1650-1660} }
Telling Left from Right: Identifying Geometry-Aware Semantic Correspondence-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Junyi and Herrmann, Charles and Hur, Junhwa and Chen, Eric and Jampani, Varun and Sun, Deqing and Yang, Ming-Hsuan}, title = {Telling Left from Right: Identifying Geometry-Aware Semantic Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3076-3085} }
NECA: Neural Customizable Human Avatar-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2024_CVPR, author = {Xiao, Junjin and Zhang, Qing and Xu, Zhan and Zheng, Wei-Shi}, title = {NECA: Neural Customizable Human Avatar}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20091-20101} }
BEVSpread: Spread Voxel Pooling for Bird's-Eye-View Representation in Vision-based Roadside 3D Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Wenjie and Lu, Yehao and Zheng, Guangcong and Zhan, Shuigen and Ye, Xiaoqing and Tan, Zichang and Wang, Jingdong and Wang, Gaoang and Li, Xi}, title = {BEVSpread: Spread Voxel Pooling for Bird's-Eye-View Representation in Vision-based Roadside 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14718-14727} }
Real-IAD: A Real-World Multi-View Dataset for Benchmarking Versatile Industrial Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Chengjie and Zhu, Wenbing and Gao, Bin-Bin and Gan, Zhenye and Zhang, Jiangning and Gu, Zhihao and Qian, Shuguang and Chen, Mingang and Ma, Lizhuang}, title = {Real-IAD: A Real-World Multi-View Dataset for Benchmarking Versatile Industrial Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22883-22892} }
PAIR Diffusion: A Comprehensive Multimodal Object-Level Image Editor-
[pdf]
[supp]
[bibtex]@InProceedings{Goel_2024_CVPR, author = {Goel, Vidit and Peruzzo, Elia and Jiang, Yifan and Xu, Dejia and Xu, Xingqian and Sebe, Nicu and Darrell, Trevor and Wang, Zhangyang and Shi, Humphrey}, title = {PAIR Diffusion: A Comprehensive Multimodal Object-Level Image Editor}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8609-8618} }
Boosting Adversarial Transferability by Block Shuffle and Rotation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Kunyu and He, Xuanran and Wang, Wenxuan and Wang, Xiaosen}, title = {Boosting Adversarial Transferability by Block Shuffle and Rotation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24336-24346} }
DriveWorld: 4D Pre-trained Scene Understanding via World Models for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Min_2024_CVPR, author = {Min, Chen and Zhao, Dawei and Xiao, Liang and Zhao, Jian and Xu, Xinli and Zhu, Zheng and Jin, Lei and Li, Jianshu and Guo, Yulan and Xing, Junliang and Jing, Liping and Nie, Yiming and Dai, Bin}, title = {DriveWorld: 4D Pre-trained Scene Understanding via World Models for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15522-15533} }
Bridging the Gap Between End-to-End and Two-Step Text Spotting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Mingxin and Li, Hongliang and Liu, Yuliang and Bai, Xiang and Jin, Lianwen}, title = {Bridging the Gap Between End-to-End and Two-Step Text Spotting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15608-15618} }
TokenCompose: Text-to-Image Diffusion with Token-level Supervision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Zirui and Sha, Zhizhou and Ding, Zheng and Wang, Yilin and Tu, Zhuowen}, title = {TokenCompose: Text-to-Image Diffusion with Token-level Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8553-8564} }
SUGAR: Pre-training 3D Visual Representations for Robotics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Shizhe and Garcia, Ricardo and Laptev, Ivan and Schmid, Cordelia}, title = {SUGAR: Pre-training 3D Visual Representations for Robotics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18049-18060} }
LidaRF: Delving into Lidar for Neural Radiance Field on Street Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2024_CVPR, author = {Sun, Shanlin and Zhuang, Bingbing and Jiang, Ziyu and Liu, Buyu and Xie, Xiaohui and Chandraker, Manmohan}, title = {LidaRF: Delving into Lidar for Neural Radiance Field on Street Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19563-19572} }
PairAug: What Can Augmented Image-Text Pairs Do for Radiology?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2024_CVPR, author = {Xie, Yutong and Chen, Qi and Wang, Sinuo and To, Minh-Son and Lee, Iris and Khoo, Ee Win and Hendy, Kerolos and Koh, Daniel and Xia, Yong and Wu, Qi}, title = {PairAug: What Can Augmented Image-Text Pairs Do for Radiology?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11652-11661} }
FINER: Flexible Spectral-bias Tuning in Implicit NEural Representation by Variable-periodic Activation Functions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Zhen and Zhu, Hao and Zhang, Qi and Fu, Jingde and Deng, Weibing and Ma, Zhan and Guo, Yanwen and Cao, Xun}, title = {FINER: Flexible Spectral-bias Tuning in Implicit NEural Representation by Variable-periodic Activation Functions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2713-2722} }
Harnessing Large Language Models for Training-free Video Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zanella_2024_CVPR, author = {Zanella, Luca and Menapace, Willi and Mancini, Massimiliano and Wang, Yiming and Ricci, Elisa}, title = {Harnessing Large Language Models for Training-free Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18527-18536} }
TextCraftor: Your Text Encoder Can be Image Quality Controller-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Yanyu and Liu, Xian and Kag, Anil and Hu, Ju and Idelbayev, Yerlan and Sagar, Dhritiman and Wang, Yanzhi and Tulyakov, Sergey and Ren, Jian}, title = {TextCraftor: Your Text Encoder Can be Image Quality Controller}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7985-7995} }
FineParser: A Fine-grained Spatio-temporal Action Parser for Human-centric Action Quality Assessment-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Jinglin and Yin, Sibo and Zhao, Guohao and Wang, Zishuo and Peng, Yuxin}, title = {FineParser: A Fine-grained Spatio-temporal Action Parser for Human-centric Action Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14628-14637} }
Video Recognition in Portrait Mode-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2024_CVPR, author = {Han, Mingfei and Yang, Linjie and Jin, Xiaojie and Feng, Jiashi and Chang, Xiaojun and Wang, Heng}, title = {Video Recognition in Portrait Mode}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21831-21841} }
Selective Hourglass Mapping for Universal Image Restoration Based on Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2024_CVPR, author = {Zheng, Dian and Wu, Xiao-Ming and Yang, Shuzhou and Zhang, Jian and Hu, Jian-Fang and Zheng, Wei-Shi}, title = {Selective Hourglass Mapping for Universal Image Restoration Based on Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25445-25455} }
Language Models as Black-Box Optimizers for Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Shihong and Yu, Samuel and Lin, Zhiqiu and Pathak, Deepak and Ramanan, Deva}, title = {Language Models as Black-Box Optimizers for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12687-12697} }
Exploring Orthogonality in Open World Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2024_CVPR, author = {Sun, Zhicheng and Li, Jinghan and Mu, Yadong}, title = {Exploring Orthogonality in Open World Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17302-17312} }
Mitigating Object Hallucinations in Large Vision-Language Models through Visual Contrastive Decoding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Leng_2024_CVPR, author = {Leng, Sicong and Zhang, Hang and Chen, Guanzheng and Li, Xin and Lu, Shijian and Miao, Chunyan and Bing, Lidong}, title = {Mitigating Object Hallucinations in Large Vision-Language Models through Visual Contrastive Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13872-13882} }
IMPRINT: Generative Object Compositing by Learning Identity-Preserving Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2024_CVPR, author = {Song, Yizhi and Zhang, Zhifei and Lin, Zhe and Cohen, Scott and Price, Brian and Zhang, Jianming and Kim, Soo Ye and Zhang, He and Xiong, Wei and Aliaga, Daniel}, title = {IMPRINT: Generative Object Compositing by Learning Identity-Preserving Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8048-8058} }
Audio-Visual Segmentation via Unlabeled Frame Exploitation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Jinxiang and Liu, Yikun and Zhang, Fei and Ju, Chen and Zhang, Ya and Wang, Yanfeng}, title = {Audio-Visual Segmentation via Unlabeled Frame Exploitation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26328-26339} }
DriveTrack: A Benchmark for Long-Range Point Tracking in Real-World Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Balasingam_2024_CVPR, author = {Balasingam, Arjun and Chandler, Joseph and Li, Chenning and Zhang, Zhoutong and Balakrishnan, Hari}, title = {DriveTrack: A Benchmark for Long-Range Point Tracking in Real-World Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22488-22497} }
Infrared Adversarial Car Stickers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Xiaopei and Liu, Yuqiu and Hu, Zhanhao and Li, Jianmin and Hu, Xiaolin}, title = {Infrared Adversarial Car Stickers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24284-24293} }
Sculpt3D: Multi-View Consistent Text-to-3D Generation with Sparse 3D Prior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Cheng and Yang, Xiaofeng and Yang, Fan and Feng, Chengzeng and Fu, Zhoujie and Foo, Chuan-Sheng and Lin, Guosheng and Liu, Fayao}, title = {Sculpt3D: Multi-View Consistent Text-to-3D Generation with Sparse 3D Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10228-10237} }
FreeMan: Towards Benchmarking 3D Human Pose Estimation under Real-World Conditions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Jiong and Yang, Fengyu and Li, Bingliang and Gou, Wenbo and Yan, Danqi and Zeng, Ailing and Gao, Yijun and Wang, Junle and Jing, Yanqing and Zhang, Ruimao}, title = {FreeMan: Towards Benchmarking 3D Human Pose Estimation under Real-World Conditions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21978-21988} }
ScanFormer: Referring Expression Comprehension by Iteratively Scanning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Su_2024_CVPR, author = {Su, Wei and Miao, Peihan and Dou, Huanzhang and Li, Xi}, title = {ScanFormer: Referring Expression Comprehension by Iteratively Scanning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13449-13458} }
Model Inversion Robustness: Can Transfer Learning Help?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ho_2024_CVPR, author = {Ho, Sy-Tuyen and Hao, Koh Jun and Chandrasegaran, Keshigeyan and Nguyen, Ngoc-Bao and Cheung, Ngai-Man}, title = {Model Inversion Robustness: Can Transfer Learning Help?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12183-12193} }
Portrait4D: Learning One-Shot 4D Head Avatar Synthesis using Synthetic Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deng_2024_CVPR, author = {Deng, Yu and Wang, Duomin and Ren, Xiaohang and Chen, Xingyu and Wang, Baoyuan}, title = {Portrait4D: Learning One-Shot 4D Head Avatar Synthesis using Synthetic Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7119-7130} }
GP-NeRF: Generalized Perception NeRF for Context-Aware 3D Scene Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Hao and Zhang, Dingwen and Dai, Yalun and Liu, Nian and Cheng, Lechao and Li, Jingfeng and Wang, Jingdong and Han, Junwei}, title = {GP-NeRF: Generalized Perception NeRF for Context-Aware 3D Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21708-21718} }
Polarization Wavefront Lidar: Learning Large Scene Reconstruction from Polarized Wavefronts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Scheuble_2024_CVPR, author = {Scheuble, Dominik and Lei, Chenyang and Baek, Seung-Hwan and Bijelic, Mario and Heide, Felix}, title = {Polarization Wavefront Lidar: Learning Large Scene Reconstruction from Polarized Wavefronts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21241-21250} }
GDA: Generalized Diffusion for Robust Test-time Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tsai_2024_CVPR, author = {Tsai, Yun-Yun and Chen, Fu-Chen and Chen, Albert Y. C. and Yang, Junfeng and Su, Che-Chun and Sun, Min and Kuo, Cheng-Hao}, title = {GDA: Generalized Diffusion for Robust Test-time Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23242-23251} }
ConvoFusion: Multi-Modal Conversational Diffusion for Co-Speech Gesture Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mughal_2024_CVPR, author = {Mughal, Muhammad Hamza and Dabral, Rishabh and Habibie, Ikhsanul and Donatelli, Lucia and Habermann, Marc and Theobalt, Christian}, title = {ConvoFusion: Multi-Modal Conversational Diffusion for Co-Speech Gesture Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1388-1398} }
RLHF-V: Towards Trustworthy MLLMs via Behavior Alignment from Fine-grained Correctional Human Feedback-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Tianyu and Yao, Yuan and Zhang, Haoye and He, Taiwen and Han, Yifeng and Cui, Ganqu and Hu, Jinyi and Liu, Zhiyuan and Zheng, Hai-Tao and Sun, Maosong and Chua, Tat-Seng}, title = {RLHF-V: Towards Trustworthy MLLMs via Behavior Alignment from Fine-grained Correctional Human Feedback}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13807-13816} }
ZeroShape: Regression-based Zero-shot Shape Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Zixuan and Stojanov, Stefan and Thai, Anh and Jampani, Varun and Rehg, James M.}, title = {ZeroShape: Regression-based Zero-shot Shape Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10061-10071} }
Continual-MAE: Adaptive Distribution Masked Autoencoders for Continual Test-Time Adaptation-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Jiaming and Xu, Ran and Yang, Senqiao and Zhang, Renrui and Zhang, Qizhe and Chen, Zehui and Guo, Yandong and Zhang, Shanghang}, title = {Continual-MAE: Adaptive Distribution Masked Autoencoders for Continual Test-Time Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28653-28663} }
The STVchrono Dataset: Towards Continuous Change Recognition in Time-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2024_CVPR, author = {Sun, Yanjun and Qiu, Yue and Khan, Mariia and Matsuzawa, Fumiya and Iwata, Kenji}, title = {The STVchrono Dataset: Towards Continuous Change Recognition in Time}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14111-14120} }
SocialCircle: Learning the Angle-based Social Interaction Representation for Pedestrian Trajectory Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wong_2024_CVPR, author = {Wong, Conghao and Xia, Beihao and Zou, Ziqian and Wang, Yulong and You, Xinge}, title = {SocialCircle: Learning the Angle-based Social Interaction Representation for Pedestrian Trajectory Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19005-19015} }
Boosting Neural Representations for Videos with a Conditional Decoder-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Xinjie and Yang, Ren and He, Dailan and Ge, Xingtong and Xu, Tongda and Wang, Yan and Qin, Hongwei and Zhang, Jun}, title = {Boosting Neural Representations for Videos with a Conditional Decoder}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2556-2566} }
Dual-Enhanced Coreset Selection with Class-wise Collaboration for Online Blurry Class Incremental Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Luo_2024_CVPR, author = {Luo, Yutian and Zhao, Shiqi and Wu, Haoran and Lu, Zhiwu}, title = {Dual-Enhanced Coreset Selection with Class-wise Collaboration for Online Blurry Class Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23995-24004} }
From Audio to Photoreal Embodiment: Synthesizing Humans in Conversations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ng_2024_CVPR, author = {Ng, Evonne and Romero, Javier and Bagautdinov, Timur and Bai, Shaojie and Darrell, Trevor and Kanazawa, Angjoo and Richard, Alexander}, title = {From Audio to Photoreal Embodiment: Synthesizing Humans in Conversations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1001-1010} }
Single-View Scene Point Cloud Human Grasp Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Yan-Kang and Xing, Chengyi and Wei, Yi-Lin and Wu, Xiao-Ming and Zheng, Wei-Shi}, title = {Single-View Scene Point Cloud Human Grasp Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {831-841} }
One-step Diffusion with Distribution Matching Distillation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yin_2024_CVPR, author = {Yin, Tianwei and Gharbi, Micha\"el and Zhang, Richard and Shechtman, Eli and Durand, Fr\'edo and Freeman, William T. and Park, Taesung}, title = {One-step Diffusion with Distribution Matching Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6613-6623} }
Cyclic Learning for Binaural Audio Generation and Localization-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Zhaojian and Zhao, Bin and Yuan, Yuan}, title = {Cyclic Learning for Binaural Audio Generation and Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26669-26678} }
Neighbor Relations Matter in Video Scene Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Tan_2024_CVPR, author = {Tan, Jiawei and Wang, Hongxing and Li, Jiaxin and Ou, Zhilong and Qian, Zhangbin}, title = {Neighbor Relations Matter in Video Scene Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18473-18482} }
Rethinking Human Motion Prediction with Symplectic Integral-
[pdf]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Haipeng and Lyu, Kedi and Liu, Zhenguang and Yin, Yifang and Yang, Xun and Lyu, Yingda}, title = {Rethinking Human Motion Prediction with Symplectic Integral}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2134-2143} }
Text-to-Image Diffusion Models are Great Sketch-Photo Matchmakers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Koley_2024_CVPR, author = {Koley, Subhadeep and Bhunia, Ayan Kumar and Sain, Aneeshan and Chowdhury, Pinaki Nath and Xiang, Tao and Song, Yi-Zhe}, title = {Text-to-Image Diffusion Models are Great Sketch-Photo Matchmakers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16826-16837} }
Mudslide: A Universal Nuclear Instance Segmentation Method-
[pdf]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Jun}, title = {Mudslide: A Universal Nuclear Instance Segmentation Method}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11673-11682} }
CPGA: Coding Priors-Guided Aggregation Network for Compressed Video Quality Enhancement-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Qiang and Hao, Jinhua and Ding, Yukang and Liu, Yu and Mo, Qiao and Sun, Ming and Zhou, Chao and Zhu, Shuyuan}, title = {CPGA: Coding Priors-Guided Aggregation Network for Compressed Video Quality Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2964-2974} }
MicroCinema: A Divide-and-Conquer Approach for Text-to-Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Yanhui and Bao, Jianmin and Weng, Wenming and Feng, Ruoyu and Yin, Dacheng and Yang, Tao and Zhang, Jingxu and Dai, Qi and Zhao, Zhiyuan and Wang, Chunyu and Qiu, Kai and Yuan, Yuhui and Sun, Xiaoyan and Luo, Chong and Guo, Baining}, title = {MicroCinema: A Divide-and-Conquer Approach for Text-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8414-8424} }
Learning Instance-Aware Correspondences for Robust Multi-Instance Point Cloud Registration in Cluttered Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Zhiyuan and Qin, Zheng and Zheng, Lintao and Xu, Kai}, title = {Learning Instance-Aware Correspondences for Robust Multi-Instance Point Cloud Registration in Cluttered Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19605-19614} }
Structure Matters: Tackling the Semantic Discrepancy in Diffusion Models for Image Inpainting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Haipeng and Wang, Yang and Qian, Biao and Wang, Meng and Rui, Yong}, title = {Structure Matters: Tackling the Semantic Discrepancy in Diffusion Models for Image Inpainting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8038-8047} }
Modeling Multimodal Social Interactions: New Challenges and Baselines with Densely Aligned Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2024_CVPR, author = {Lee, Sangmin and Lai, Bolin and Ryan, Fiona and Boote, Bikram and Rehg, James M.}, title = {Modeling Multimodal Social Interactions: New Challenges and Baselines with Densely Aligned Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14585-14595} }
COCONut: Modernizing COCO Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deng_2024_CVPR, author = {Deng, Xueqing and Yu, Qihang and Wang, Peng and Shen, Xiaohui and Chen, Liang-Chieh}, title = {COCONut: Modernizing COCO Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21863-21873} }
Semantic Line Combination Detector-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ko_2024_CVPR, author = {Ko, Jinwon and Jin, Dongkwon and Kim, Chang-Su}, title = {Semantic Line Combination Detector}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28066-28075} }
Prompt-Driven Dynamic Object-Centric Learning for Single Domain Generalization-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Deng and Wu, Aming and Wang, Yaowei and Han, Yahong}, title = {Prompt-Driven Dynamic Object-Centric Learning for Single Domain Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17606-17615} }
Dual Pose-invariant Embeddings: Learning Category and Object-specific Discriminative Representations for Recognition and Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sarkar_2024_CVPR, author = {Sarkar, Rohan and Kak, Avinash}, title = {Dual Pose-invariant Embeddings: Learning Category and Object-specific Discriminative Representations for Recognition and Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17077-17085} }
vid-TLDR: Training Free Token Merging for Light-weight Video Transformer-
[pdf]
[supp]
[bibtex]@InProceedings{Choi_2024_CVPR, author = {Choi, Joonmyung and Lee, Sanghyeok and Chu, Jaewon and Choi, Minhyuk and Kim, Hyunwoo J.}, title = {vid-TLDR: Training Free Token Merging for Light-weight Video Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18771-18781} }
DRESS: Instructing Large Vision-Language Models to Align and Interact with Humans via Natural Language Feedback-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Yangyi and Sikka, Karan and Cogswell, Michael and Ji, Heng and Divakaran, Ajay}, title = {DRESS: Instructing Large Vision-Language Models to Align and Interact with Humans via Natural Language Feedback}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14239-14250} }
Makeup Prior Models for 3D Facial Makeup Estimation and Applications-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Xingchao and Taketomi, Takafumi and Endo, Yuki and Kanamori, Yoshihiro}, title = {Makeup Prior Models for 3D Facial Makeup Estimation and Applications}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2165-2176} }
Salience DETR: Enhancing Detection Transformer with Hierarchical Salience Filtering Refinement-
[pdf]
[arXiv]
[bibtex]@InProceedings{Hou_2024_CVPR, author = {Hou, Xiuquan and Liu, Meiqin and Zhang, Senlin and Wei, Ping and Chen, Badong}, title = {Salience DETR: Enhancing Detection Transformer with Hierarchical Salience Filtering Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17574-17583} }
Towards More Unified In-context Visual Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sheng_2024_CVPR, author = {Sheng, Dianmo and Chen, Dongdong and Tan, Zhentao and Liu, Qiankun and Chu, Qi and Bao, Jianmin and Gong, Tao and Liu, Bin and Xu, Shengwei and Yu, Nenghai}, title = {Towards More Unified In-context Visual Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13362-13372} }
F3Loc: Fusion and Filtering for Floorplan Localization-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Changan and Wang, Rui and Vogel, Christoph and Pollefeys, Marc}, title = {F3Loc: Fusion and Filtering for Floorplan Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18029-18038} }
ReconFusion: 3D Reconstruction with Diffusion Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Rundi and Mildenhall, Ben and Henzler, Philipp and Park, Keunhong and Gao, Ruiqi and Watson, Daniel and Srinivasan, Pratul P. and Verbin, Dor and Barron, Jonathan T. and Poole, Ben and Ho?y?ski, Aleksander}, title = {ReconFusion: 3D Reconstruction with Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21551-21561} }
I'M HOI: Inertia-aware Monocular Capture of 3D Human-Object Interactions-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Chengfeng and Zhang, Juze and Du, Jiashen and Shan, Ziwei and Wang, Junye and Yu, Jingyi and Wang, Jingya and Xu, Lan}, title = {I'M HOI: Inertia-aware Monocular Capture of 3D Human-Object Interactions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {729-741} }
Dynamic Policy-Driven Adaptive Multi-Instance Learning for Whole Slide Image Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2024_CVPR, author = {Zheng, Tingting and Jiang, Kui and Yao, Hongxun}, title = {Dynamic Policy-Driven Adaptive Multi-Instance Learning for Whole Slide Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8028-8037} }
InternVL: Scaling up Vision Foundation Models and Aligning for Generic Visual-Linguistic Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Zhe and Wu, Jiannan and Wang, Wenhai and Su, Weijie and Chen, Guo and Xing, Sen and Zhong, Muyan and Zhang, Qinglong and Zhu, Xizhou and Lu, Lewei and Li, Bin and Luo, Ping and Lu, Tong and Qiao, Yu and Dai, Jifeng}, title = {InternVL: Scaling up Vision Foundation Models and Aligning for Generic Visual-Linguistic Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24185-24198} }
Multi-View Attentive Contextualization for Multi-View 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Xianpeng and Zheng, Ce and Qian, Ming and Xue, Nan and Chen, Chen and Zhang, Zhebin and Li, Chen and Wu, Tianfu}, title = {Multi-View Attentive Contextualization for Multi-View 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16688-16698} }
MemSAM: Taming Segment Anything Model for Echocardiography Video Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Deng_2024_CVPR, author = {Deng, Xiaolong and Wu, Huisi and Zeng, Runhao and Qin, Jing}, title = {MemSAM: Taming Segment Anything Model for Echocardiography Video Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9622-9631} }
LiDAR4D: Dynamic Neural Fields for Novel Space-time View LiDAR Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2024_CVPR, author = {Zheng, Zehan and Lu, Fan and Xue, Weiyi and Chen, Guang and Jiang, Changjun}, title = {LiDAR4D: Dynamic Neural Fields for Novel Space-time View LiDAR Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5145-5154} }
Exploiting Diffusion Prior for Generalizable Dense Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2024_CVPR, author = {Lee, Hsin-Ying and Tseng, Hung-Yu and Lee, Hsin-Ying and Yang, Ming-Hsuan}, title = {Exploiting Diffusion Prior for Generalizable Dense Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7861-7871} }
PI3D: Efficient Text-to-3D Generation with Pseudo-Image Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Ying-Tian and Guo, Yuan-Chen and Luo, Guan and Sun, Heyi and Yin, Wei and Zhang, Song-Hai}, title = {PI3D: Efficient Text-to-3D Generation with Pseudo-Image Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19915-19924} }
Orthogonal Adaptation for Modular Customization of Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Po_2024_CVPR, author = {Po, Ryan and Yang, Guandao and Aberman, Kfir and Wetzstein, Gordon}, title = {Orthogonal Adaptation for Modular Customization of Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7964-7973} }
pixelSplat: 3D Gaussian Splats from Image Pairs for Scalable Generalizable 3D Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Charatan_2024_CVPR, author = {Charatan, David and Li, Sizhe Lester and Tagliasacchi, Andrea and Sitzmann, Vincent}, title = {pixelSplat: 3D Gaussian Splats from Image Pairs for Scalable Generalizable 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19457-19467} }
VBench: Comprehensive Benchmark Suite for Video Generative Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Ziqi and He, Yinan and Yu, Jiashuo and Zhang, Fan and Si, Chenyang and Jiang, Yuming and Zhang, Yuanhan and Wu, Tianxing and Jin, Qingyang and Chanpaisit, Nattapol and Wang, Yaohui and Chen, Xinyuan and Wang, Limin and Lin, Dahua and Qiao, Yu and Liu, Ziwei}, title = {VBench: Comprehensive Benchmark Suite for Video Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21807-21818} }
Language-conditioned Detection Transformer-
[pdf]
[supp]
[bibtex]@InProceedings{Cho_2024_CVPR, author = {Cho, Jang Hyun and Kr\"ahenb\"uhl, Philipp}, title = {Language-conditioned Detection Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16593-16603} }
Optimizing Diffusion Noise Can Serve As Universal Motion Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Karunratanakul_2024_CVPR, author = {Karunratanakul, Korrawe and Preechakul, Konpat and Aksan, Emre and Beeler, Thabo and Suwajanakorn, Supasorn and Tang, Siyu}, title = {Optimizing Diffusion Noise Can Serve As Universal Motion Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1334-1345} }
MAP: MAsk-Pruning for Source-Free Model Intellectual Property Protection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2024_CVPR, author = {Peng, Boyang and Qu, Sanqing and Wu, Yong and Zou, Tianpei and He, Lianghua and Knoll, Alois and Chen, Guang and Jiang, Changjun}, title = {MAP: MAsk-Pruning for Source-Free Model Intellectual Property Protection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23585-23594} }
Improving Single Domain-Generalized Object Detection: A Focus on Diversification and Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Danish_2024_CVPR, author = {Danish, Muhammad Sohail and Khan, Muhammad Haris and Munir, Muhammad Akhtar and Sarfraz, M. Saquib and Ali, Mohsen}, title = {Improving Single Domain-Generalized Object Detection: A Focus on Diversification and Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17732-17742} }
OVFoodSeg: Elevating Open-Vocabulary Food Image Segmentation via Image-Informed Textual Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Xiongwei and Yu, Sicheng and Lim, Ee-Peng and Ngo, Chong-Wah}, title = {OVFoodSeg: Elevating Open-Vocabulary Food Image Segmentation via Image-Informed Textual Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4144-4153} }
XFeat: Accelerated Features for Lightweight Image Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Potje_2024_CVPR, author = {Potje, Guilherme and Cadar, Felipe and Araujo, Andr\'e and Martins, Renato and Nascimento, Erickson R.}, title = {XFeat: Accelerated Features for Lightweight Image Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2682-2691} }
Visual Prompting for Generalized Few-shot Segmentation: A Multi-scale Approach-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hossain_2024_CVPR, author = {Hossain, Mir Rayat Imtiaz and Siam, Mennatullah and Sigal, Leonid and Little, James J.}, title = {Visual Prompting for Generalized Few-shot Segmentation: A Multi-scale Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23470-23480} }
ARTrackV2: Prompting Autoregressive Tracker Where to Look and How to Describe-
[pdf]
[arXiv]
[bibtex]@InProceedings{Bai_2024_CVPR, author = {Bai, Yifan and Zhao, Zeyang and Gong, Yihong and Wei, Xing}, title = {ARTrackV2: Prompting Autoregressive Tracker Where to Look and How to Describe}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19048-19057} }
A Vision Check-up for Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sharma_2024_CVPR, author = {Sharma, Pratyusha and Shaham, Tamar Rott and Baradad, Manel and Fu, Stephanie and Rodriguez-Munoz, Adrian and Duggal, Shivam and Isola, Phillip and Torralba, Antonio}, title = {A Vision Check-up for Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14410-14419} }
Memory-based Adapters for Online 3D Scene Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Xiuwei and Xia, Chong and Wang, Ziwei and Zhao, Linqing and Duan, Yueqi and Zhou, Jie and Lu, Jiwen}, title = {Memory-based Adapters for Online 3D Scene Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21604-21613} }
SyncMask: Synchronized Attentional Masking for Fashion-centric Vision-Language Pretraining-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2024_CVPR, author = {Song, Chull Hwan and Hwang, Taebaek and Yoon, Jooyoung and Choi, Shunghyun and Gu, Yeong Hyeon}, title = {SyncMask: Synchronized Attentional Masking for Fashion-centric Vision-Language Pretraining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13948-13957} }
A Study of Dropout-Induced Modality Bias on Robustness to Missing Video Frames for Audio-Visual Speech Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dai_2024_CVPR, author = {Dai, Yusheng and Chen, Hang and Du, Jun and Wang, Ruoyu and Chen, Shihao and Wang, Haotian and Lee, Chin-Hui}, title = {A Study of Dropout-Induced Modality Bias on Robustness to Missing Video Frames for Audio-Visual Speech Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27445-27455} }
A Conditional Denoising Diffusion Probabilistic Model for Point Cloud Upsampling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qu_2024_CVPR, author = {Qu, Wentao and Shao, Yuantian and Meng, Lingwu and Huang, Xiaoshui and Xiao, Liang}, title = {A Conditional Denoising Diffusion Probabilistic Model for Point Cloud Upsampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20786-20795} }
VideoRF: Rendering Dynamic Radiance Fields as 2D Feature Video Streams-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Liao and Yao, Kaixin and Guo, Chengcheng and Zhang, Zhirui and Hu, Qiang and Yu, Jingyi and Xu, Lan and Wu, Minye}, title = {VideoRF: Rendering Dynamic Radiance Fields as 2D Feature Video Streams}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {470-481} }
DPHMs: Diffusion Parametric Head Models for Depth-based Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2024_CVPR, author = {Tang, Jiapeng and Dai, Angela and Nie, Yinyu and Markhasin, Lev and Thies, Justus and Nie{\ss}ner, Matthias}, title = {DPHMs: Diffusion Parametric Head Models for Depth-based Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1111-1122} }
DetDiffusion: Synergizing Generative and Perceptive Models for Enhanced Data Generation and Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Yibo and Gao, Ruiyuan and Chen, Kai and Zhou, Kaiqiang and Cai, Yingjie and Hong, Lanqing and Li, Zhenguo and Jiang, Lihui and Yeung, Dit-Yan and Xu, Qiang and Zhang, Kai}, title = {DetDiffusion: Synergizing Generative and Perceptive Models for Enhanced Data Generation and Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7246-7255} }
GAFusion: Adaptive Fusing LiDAR and Camera with Multiple Guidance for 3D Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Xiaotian and Fan, Baojie and Tian, Jiandong and Fan, Huijie}, title = {GAFusion: Adaptive Fusing LiDAR and Camera with Multiple Guidance for 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21209-21218} }
Perception-Oriented Video Frame Interpolation via Asymmetric Blending-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Guangyang and Tao, Xin and Li, Changlin and Wang, Wenyi and Liu, Xiaohong and Zheng, Qingqing}, title = {Perception-Oriented Video Frame Interpolation via Asymmetric Blending}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2753-2762} }
Countering Personalized Text-to-Image Generation with Influence Watermarks-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Hanwen and Sun, Zhicheng and Mu, Yadong}, title = {Countering Personalized Text-to-Image Generation with Influence Watermarks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12257-12267} }
DUDF: Differentiable Unsigned Distance Fields with Hyperbolic Scaling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fainstein_2024_CVPR, author = {Fainstein, Miguel and Siless, Viviana and Iarussi, Emmanuel}, title = {DUDF: Differentiable Unsigned Distance Fields with Hyperbolic Scaling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4484-4493} }
PromptAD: Learning Prompts with only Normal Samples for Few-Shot Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Xiaofan and Zhang, Zhizhong and Tan, Xin and Chen, Chengwei and Qu, Yanyun and Xie, Yuan and Ma, Lizhuang}, title = {PromptAD: Learning Prompts with only Normal Samples for Few-Shot Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16838-16848} }
Improving Graph Contrastive Learning via Adaptive Positive Sampling-
[pdf]
[supp]
[bibtex]@InProceedings{Zhuo_2024_CVPR, author = {Zhuo, Jiaming and Qin, Feiyang and Cui, Can and Fu, Kun and Niu, Bingxin and Wang, Mengzhu and Guo, Yuanfang and Wang, Chuan and Wang, Zhen and Cao, Xiaochun and Yang, Liang}, title = {Improving Graph Contrastive Learning via Adaptive Positive Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23179-23187} }
UFC-Net: Unrolling Fixed-point Continuous Network for Deep Compressive Sensing-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Xiaoyang and Gan, Hongping}, title = {UFC-Net: Unrolling Fixed-point Continuous Network for Deep Compressive Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25149-25159} }
ECoDepth: Effective Conditioning of Diffusion Models for Monocular Depth Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Patni_2024_CVPR, author = {Patni, Suraj and Agarwal, Aradhye and Arora, Chetan}, title = {ECoDepth: Effective Conditioning of Diffusion Models for Monocular Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28285-28295} }
DL3DV-10K: A Large-Scale Scene Dataset for Deep Learning-based 3D Vision-
[pdf]
[supp]
[bibtex]@InProceedings{Ling_2024_CVPR, author = {Ling, Lu and Sheng, Yichen and Tu, Zhi and Zhao, Wentian and Xin, Cheng and Wan, Kun and Yu, Lantao and Guo, Qianyu and Yu, Zixun and Lu, Yawen and Li, Xuanmao and Sun, Xingpeng and Ashok, Rohan and Mukherjee, Aniruddha and Kang, Hao and Kong, Xiangrui and Hua, Gang and Zhang, Tianyi and Benes, Bedrich and Bera, Aniket}, title = {DL3DV-10K: A Large-Scale Scene Dataset for Deep Learning-based 3D Vision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22160-22169} }
2S-UDF: A Novel Two-stage UDF Learning Method for Robust Non-watertight Model Reconstruction from Multi-view Images-
[pdf]
[supp]
[bibtex]@InProceedings{Deng_2024_CVPR, author = {Deng, Junkai and Hou, Fei and Chen, Xuhui and Wang, Wencheng and He, Ying}, title = {2S-UDF: A Novel Two-stage UDF Learning Method for Robust Non-watertight Model Reconstruction from Multi-view Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5084-5093} }
DETRs Beat YOLOs on Real-time Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Yian and Lv, Wenyu and Xu, Shangliang and Wei, Jinman and Wang, Guanzhong and Dang, Qingqing and Liu, Yi and Chen, Jie}, title = {DETRs Beat YOLOs on Real-time Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16965-16974} }
UniVS: Unified and Universal Video Segmentation with Prompts as Queries-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Minghan and Li, Shuai and Zhang, Xindong and Zhang, Lei}, title = {UniVS: Unified and Universal Video Segmentation with Prompts as Queries}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3227-3238} }
Bilateral Adaptation for Human-Object Interaction Detection with Occlusion-Robustness-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Guangzhi and Guo, Yangyang and Xu, Ziwei and Kankanhalli, Mohan}, title = {Bilateral Adaptation for Human-Object Interaction Detection with Occlusion-Robustness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27970-27980} }
An Asymmetric Augmented Self-Supervised Learning Method for Unsupervised Fine-Grained Image Hashing-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2024_CVPR, author = {Hu, Feiran and Zhang, Chenlin and Guo, Jiangliang and Wei, Xiu-Shen and Zhao, Lin and Xu, Anqi and Gao, Lingyan}, title = {An Asymmetric Augmented Self-Supervised Learning Method for Unsupervised Fine-Grained Image Hashing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17648-17657} }
Efficiently Assemble Normalization Layers and Regularization for Federated Domain Generalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Le_2024_CVPR, author = {Le, Khiem and Ho, Long and Do, Cuong and Le-Phuoc, Danh and Wong, Kok-Seng}, title = {Efficiently Assemble Normalization Layers and Regularization for Federated Domain Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6027-6036} }
Exploring Pose-Aware Human-Object Interaction via Hybrid Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Eastman Z Y and Li, Yali and Wang, Yuan and Wang, Shengjin}, title = {Exploring Pose-Aware Human-Object Interaction via Hybrid Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17815-17825} }
Depth Information Assisted Collaborative Mutual Promotion Network for Single Image Dehazing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yafei and Zhou, Shen and Li, Huafeng}, title = {Depth Information Assisted Collaborative Mutual Promotion Network for Single Image Dehazing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2846-2855} }
Density-Adaptive Model Based on Motif Matrix for Multi-Agent Trajectory Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Wen_2024_CVPR, author = {Wen, Di and Xu, Haoran and He, Zhaocheng and Wu, Zhe and Tan, Guang and Peng, Peixi}, title = {Density-Adaptive Model Based on Motif Matrix for Multi-Agent Trajectory Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14822-14832} }
Contrastive Learning for DeepFake Classification and Localization via Multi-Label Ranking-
[pdf]
[supp]
[bibtex]@InProceedings{Hong_2024_CVPR, author = {Hong, Cheng-Yao and Hsu, Yen-Chi and Liu, Tyng-Luh}, title = {Contrastive Learning for DeepFake Classification and Localization via Multi-Label Ranking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17627-17637} }
Unlocking the Potential of Pre-trained Vision Transformers for Few-Shot Semantic Segmentation through Relationship Descriptors-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Ziqin and Xu, Hai-Ming and Shu, Yangyang and Liu, Lingqiao}, title = {Unlocking the Potential of Pre-trained Vision Transformers for Few-Shot Semantic Segmentation through Relationship Descriptors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3817-3827} }
CustomListener: Text-guided Responsive Interaction for User-friendly Listening Head Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Xi and Guo, Ying and Zhen, Cheng and Li, Tong and Ao, Yingying and Yan, Pengfei}, title = {CustomListener: Text-guided Responsive Interaction for User-friendly Listening Head Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2415-2424} }
Projecting Trackable Thermal Patterns for Dynamic Computer Vision-
[pdf]
[supp]
[bibtex]@InProceedings{Sheinin_2024_CVPR, author = {Sheinin, Mark and Sankaranarayanan, Aswin C. and Narasimhan, Srinivasa G.}, title = {Projecting Trackable Thermal Patterns for Dynamic Computer Vision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25223-25232} }
SG-PGM: Partial Graph Matching Network with Semantic Geometric Fusion for 3D Scene Graph Alignment and Its Downstream Tasks-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2024_CVPR, author = {Xie, Yaxu and Pagani, Alain and Stricker, Didier}, title = {SG-PGM: Partial Graph Matching Network with Semantic Geometric Fusion for 3D Scene Graph Alignment and Its Downstream Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28401-28411} }
Fun with Flags: Robust Principal Directions via Flag Manifolds-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mankovich_2024_CVPR, author = {Mankovich, Nathan and Camps-Valls, Gustau and Birdal, Tolga}, title = {Fun with Flags: Robust Principal Directions via Flag Manifolds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {330-340} }
Generating Non-Stationary Textures using Self-Rectification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Yang and Xiao, Rongjun and Lischinski, Dani and Cohen-Or, Daniel and Huang, Hui}, title = {Generating Non-Stationary Textures using Self-Rectification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7767-7776} }
SPU-PMD: Self-Supervised Point Cloud Upsampling via Progressive Mesh Deformation-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Yanzhe and Chen, Rong and Li, Yushi and Li, Yixi and Tan, Xuehou}, title = {SPU-PMD: Self-Supervised Point Cloud Upsampling via Progressive Mesh Deformation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5188-5197} }
Advancing Saliency Ranking with Human Fixations: Dataset Models and Benchmarks-
[pdf]
[supp]
[bibtex]@InProceedings{Deng_2024_CVPR, author = {Deng, Bowen and Song, Siyang and French, Andrew P. and Schluppeck, Denis and Pound, Michael P.}, title = {Advancing Saliency Ranking with Human Fixations: Dataset Models and Benchmarks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28348-28357} }
Snap Video: Scaled Spatiotemporal Transformers for Text-to-Video Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Menapace_2024_CVPR, author = {Menapace, Willi and Siarohin, Aliaksandr and Skorokhodov, Ivan and Deyneka, Ekaterina and Chen, Tsai-Shien and Kag, Anil and Fang, Yuwei and Stoliar, Aleksei and Ricci, Elisa and Ren, Jian and Tulyakov, Sergey}, title = {Snap Video: Scaled Spatiotemporal Transformers for Text-to-Video Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7038-7048} }
Unsupervised Deep Unrolling Networks for Phase Unwrapping-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Zhile and Quan, Yuhui and Ji, Hui}, title = {Unsupervised Deep Unrolling Networks for Phase Unwrapping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25182-25192} }
Federated Generalized Category Discovery-
[pdf]
[supp]
[bibtex]@InProceedings{Pu_2024_CVPR, author = {Pu, Nan and Li, Wenjing and Ji, Xingyuan and Qin, Yalan and Sebe, Nicu and Zhong, Zhun}, title = {Federated Generalized Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28741-28750} }
JointSQ: Joint Sparsification-Quantization for Distributed Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2024_CVPR, author = {Xie, Weiying and Li, Haowei and Ma, Jitao and Li, Yunsong and Lei, Jie and Liu, Donglai and Fang, Leyuan}, title = {JointSQ: Joint Sparsification-Quantization for Distributed Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5778-5787} }
A Unified Framework for Human-centric Point Cloud Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Yiteng and Ye, Kecheng and Han, Xiao and Ren, Yiming and Zhu, Xinge and Ma, Yuexin}, title = {A Unified Framework for Human-centric Point Cloud Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1155-1164} }
Edge-Aware 3D Instance Segmentation Network with Intelligent Semantic Prior-
[pdf]
[supp]
[bibtex]@InProceedings{Roh_2024_CVPR, author = {Roh, Wonseok and Jung, Hwanhee and Nam, Giljoo and Yeom, Jinseop and Park, Hyunje and Yoon, Sang Ho and Kim, Sangpil}, title = {Edge-Aware 3D Instance Segmentation Network with Intelligent Semantic Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20644-20653} }
Coherence As Texture - Passive Textureless 3D Reconstruction by Self-interference-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Wei-Yu and Sankaranarayanan, Aswin C. and Levin, Anat and O'Toole, Matthew}, title = {Coherence As Texture - Passive Textureless 3D Reconstruction by Self-interference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25058-25066} }
Enhancing the Power of OOD Detection via Sample-Aware Model Selection-
[pdf]
[supp]
[bibtex]@InProceedings{Xue_2024_CVPR, author = {Xue, Feng and He, Zi and Zhang, Yuan and Xie, Chuanlong and Li, Zhenguo and Tan, Falong}, title = {Enhancing the Power of OOD Detection via Sample-Aware Model Selection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17148-17157} }
Collaborative Semantic Occupancy Prediction with Hybrid Feature Fusion in Connected Automated Vehicles-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2024_CVPR, author = {Song, Rui and Liang, Chenwei and Cao, Hu and Yan, Zhiran and Zimmer, Walter and Gross, Markus and Festag, Andreas and Knoll, Alois}, title = {Collaborative Semantic Occupancy Prediction with Hybrid Feature Fusion in Connected Automated Vehicles}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17996-18006} }
Generative Multi-modal Models are Good Class Incremental Learners-
[pdf]
[arXiv]
[bibtex]@InProceedings{Cao_2024_CVPR, author = {Cao, Xusheng and Lu, Haori and Huang, Linlan and Liu, Xialei and Cheng, Ming-Ming}, title = {Generative Multi-modal Models are Good Class Incremental Learners}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28706-28717} }
Low-Resource Vision Challenges for Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yunhua and Doughty, Hazel and Snoek, Cees G. M.}, title = {Low-Resource Vision Challenges for Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21956-21966} }
RGBD Objects in the Wild: Scaling Real-World 3D Object Learning from RGB-D Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Xia_2024_CVPR, author = {Xia, Hongchi and Fu, Yang and Liu, Sifei and Wang, Xiaolong}, title = {RGBD Objects in the Wild: Scaling Real-World 3D Object Learning from RGB-D Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22378-22389} }
Shadow-Enlightened Image Outpainting-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Hang and Li, Ruilin and Xie, Shaorong and Qiu, Jiayan}, title = {Shadow-Enlightened Image Outpainting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7850-7860} }
Towards Generalizable Tumor Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Qi and Chen, Xiaoxi and Song, Haorui and Xiong, Zhiwei and Yuille, Alan and Wei, Chen and Zhou, Zongwei}, title = {Towards Generalizable Tumor Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11147-11158} }
Low-Res Leads the Way: Improving Generalization for Super-Resolution by Self-Supervised Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Haoyu and Li, Wenbo and Gu, Jinjin and Ren, Jingjing and Sun, Haoze and Zou, Xueyi and Zhang, Zhensong and Yan, Youliang and Zhu, Lei}, title = {Low-Res Leads the Way: Improving Generalization for Super-Resolution by Self-Supervised Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25857-25867} }
BOTH2Hands: Inferring 3D Hands from Both Text Prompts and Body Dynamics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Wenqian and Huang, Molin and Zhou, Yuxuan and Zhang, Juze and Yu, Jingyi and Wang, Jingya and Xu, Lan}, title = {BOTH2Hands: Inferring 3D Hands from Both Text Prompts and Body Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2393-2404} }
EpiDiff: Enhancing Multi-View Synthesis via Localized Epipolar-Constrained Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Zehuan and Wen, Hao and Dong, Junting and Wang, Yaohui and Li, Yangguang and Chen, Xinyuan and Cao, Yan-Pei and Liang, Ding and Qiao, Yu and Dai, Bo and Sheng, Lu}, title = {EpiDiff: Enhancing Multi-View Synthesis via Localized Epipolar-Constrained Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9784-9794} }
On the Faithfulness of Vision Transformer Explanations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Junyi and Kang, Weitai and Tang, Hao and Hong, Yuan and Yan, Yan}, title = {On the Faithfulness of Vision Transformer Explanations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10936-10945} }
Pixel-level Semantic Correspondence through Layout-aware Representation Learning and Multi-scale Matching Integration-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2024_CVPR, author = {Sun, Yixuan and Yin, Zhangyue and Wang, Haibo and Wang, Yan and Qiu, Xipeng and Ge, Weifeng and Zhang, Wenqiang}, title = {Pixel-level Semantic Correspondence through Layout-aware Representation Learning and Multi-scale Matching Integration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17047-17056} }
Learning Spatial Features from Audio-Visual Correspondence in Egocentric Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Majumder_2024_CVPR, author = {Majumder, Sagnik and Al-Halah, Ziad and Grauman, Kristen}, title = {Learning Spatial Features from Audio-Visual Correspondence in Egocentric Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27058-27068} }
DreamAvatar: Text-and-Shape Guided 3D Human Avatar Generation via Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2024_CVPR, author = {Cao, Yukang and Cao, Yan-Pei and Han, Kai and Shan, Ying and Wong, Kwan-Yee K.}, title = {DreamAvatar: Text-and-Shape Guided 3D Human Avatar Generation via Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {958-968} }
Dynamic Graph Representation with Knowledge-aware Attention for Histopathology Whole Slide Image Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Jiawen and Chen, Yuxuan and Chu, Hongbo and Sun, Qiehe and Guan, Tian and Han, Anjia and He, Yonghong}, title = {Dynamic Graph Representation with Knowledge-aware Attention for Histopathology Whole Slide Image Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11323-11332} }
Brain Decodes Deep Nets-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Huzheng and Gee, James and Shi, Jianbo}, title = {Brain Decodes Deep Nets}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23030-23040} }
Semantics Distortion and Style Matter: Towards Source-free UDA for Panoramic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2024_CVPR, author = {Zheng, Xu and Zhou, Pengyuan and Vasilakos, Athanasios V. and Wang, Lin}, title = {Semantics Distortion and Style Matter: Towards Source-free UDA for Panoramic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27885-27895} }
Bidirectional Autoregessive Diffusion Model for Dance Generation-
[pdf]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Canyu and Tang, Youbao and Zhang, Ning and Lin, Ruei-Sung and Han, Mei and Xiao, Jing and Wang, Song}, title = {Bidirectional Autoregessive Diffusion Model for Dance Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {687-696} }
Align Before Adapt: Leveraging Entity-to-Region Alignments for Generalizable Video Action Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Yifei and Chen, Dapeng and Liu, Ruijin and Zhou, Sai and Xue, Wenyuan and Peng, Wei}, title = {Align Before Adapt: Leveraging Entity-to-Region Alignments for Generalizable Video Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18688-18698} }
GOV-NeSF: Generalizable Open-Vocabulary Neural Semantic Fields-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Yunsong and Chen, Hanlin and Lee, Gim Hee}, title = {GOV-NeSF: Generalizable Open-Vocabulary Neural Semantic Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20443-20453} }
FRESCO: Spatial-Temporal Correspondence for Zero-Shot Video Translation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Shuai and Zhou, Yifan and Liu, Ziwei and Loy, Chen Change}, title = {FRESCO: Spatial-Temporal Correspondence for Zero-Shot Video Translation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8703-8712} }
Dual-Scale Transformer for Large-Scale Single-Pixel Imaging-
[pdf]
[arXiv]
[bibtex]@InProceedings{Qu_2024_CVPR, author = {Qu, Gang and Wang, Ping and Yuan, Xin}, title = {Dual-Scale Transformer for Large-Scale Single-Pixel Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25327-25337} }
Towards Robust 3D Object Detection with LiDAR and 4D Radar Fusion in Various Weather Conditions-
[pdf]
[supp]
[bibtex]@InProceedings{Chae_2024_CVPR, author = {Chae, Yujeong and Kim, Hyeonseong and Yoon, Kuk-Jin}, title = {Towards Robust 3D Object Detection with LiDAR and 4D Radar Fusion in Various Weather Conditions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15162-15172} }
Enhancing 3D Fidelity of Text-to-3D using Cross-View Correspondences-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Seungwook and Li, Kejie and Deng, Xueqing and Shi, Yichun and Cho, Minsu and Wang, Peng}, title = {Enhancing 3D Fidelity of Text-to-3D using Cross-View Correspondences}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10649-10658} }
Bezier Everywhere All at Once: Learning Drivable Lanes as Bezier Graphs-
[pdf]
[supp]
[bibtex]@InProceedings{Blayney_2024_CVPR, author = {Blayney, Hugh and Tian, Hanlin and Scott, Hamish and Goldbeck, Nils and Stetson, Chess and Angeloudis, Panagiotis}, title = {Bezier Everywhere All at Once: Learning Drivable Lanes as Bezier Graphs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15365-15374} }
SplattingAvatar: Realistic Real-Time Human Avatars with Mesh-Embedded Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shao_2024_CVPR, author = {Shao, Zhijing and Wang, Zhaolong and Li, Zhuang and Wang, Duotun and Lin, Xiangru and Zhang, Yu and Fan, Mingming and Wang, Zeyu}, title = {SplattingAvatar: Realistic Real-Time Human Avatars with Mesh-Embedded Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1606-1616} }
MoSAR: Monocular Semi-Supervised Model for Avatar Reconstruction using Differentiable Shading-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dib_2024_CVPR, author = {Dib, Abdallah and Hafemann, Luiz Gustavo and Got, Emeline and Anderson, Trevor and Fadaeinejad, Amin and Cruz, Rafael M. O. and Carbonneau, Marc-Andr\'e}, title = {MoSAR: Monocular Semi-Supervised Model for Avatar Reconstruction using Differentiable Shading}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1770-1780} }
Bridging Remote Sensors with Multisensor Geospatial Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2024_CVPR, author = {Han, Boran and Zhang, Shuai and Shi, Xingjian and Reichstein, Markus}, title = {Bridging Remote Sensors with Multisensor Geospatial Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27852-27862} }
Can I Trust Your Answer? Visually Grounded Video Question Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2024_CVPR, author = {Xiao, Junbin and Yao, Angela and Li, Yicong and Chua, Tat-Seng}, title = {Can I Trust Your Answer? Visually Grounded Video Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13204-13214} }
RankED: Addressing Imbalance and Uncertainty in Edge Detection Using Ranking-based Losses-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cetinkaya_2024_CVPR, author = {Cetinkaya, Bedrettin and Kalkan, Sinan and Akbas, Emre}, title = {RankED: Addressing Imbalance and Uncertainty in Edge Detection Using Ranking-based Losses}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3239-3249} }
DiffHuman: Probabilistic Photorealistic 3D Reconstruction of Humans-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sengupta_2024_CVPR, author = {Sengupta, Akash and Alldieck, Thiemo and Kolotouros, Nikos and Corona, Enric and Zanfir, Andrei and Sminchisescu, Cristian}, title = {DiffHuman: Probabilistic Photorealistic 3D Reconstruction of Humans}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1439-1449} }
SeeSR: Towards Semantics-Aware Real-World Image Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Rongyuan and Yang, Tao and Sun, Lingchen and Zhang, Zhengqiang and Li, Shuai and Zhang, Lei}, title = {SeeSR: Towards Semantics-Aware Real-World Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25456-25467} }
Permutation Equivariance of Transformers and Its Applications-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Hengyuan and Xiang, Liyao and Ye, Hangyu and Yao, Dixi and Chu, Pengzhi and Li, Baochun}, title = {Permutation Equivariance of Transformers and Its Applications}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5987-5996} }
Polos: Multimodal Metric Learning from Human Feedback for Image Captioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wada_2024_CVPR, author = {Wada, Yuiga and Kaneda, Kanta and Saito, Daichi and Sugiura, Komei}, title = {Polos: Multimodal Metric Learning from Human Feedback for Image Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13559-13568} }
Detours for Navigating Instructional Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ashutosh_2024_CVPR, author = {Ashutosh, Kumar and Xue, Zihui and Nagarajan, Tushar and Grauman, Kristen}, title = {Detours for Navigating Instructional Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18804-18815} }
Discontinuity-preserving Normal Integration with Auxiliary Edges-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Hyomin and Jung, Yucheol and Lee, Seungyong}, title = {Discontinuity-preserving Normal Integration with Auxiliary Edges}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11915-11923} }
DrivingGaussian: Composite Gaussian Splatting for Surrounding Dynamic Autonomous Driving Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Xiaoyu and Lin, Zhiwei and Shan, Xiaojun and Wang, Yongtao and Sun, Deqing and Yang, Ming-Hsuan}, title = {DrivingGaussian: Composite Gaussian Splatting for Surrounding Dynamic Autonomous Driving Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21634-21643} }
Self-Supervised Multi-Object Tracking with Path Consistency-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2024_CVPR, author = {Lu, Zijia and Shuai, Bing and Chen, Yanbei and Xu, Zhenlin and Modolo, Davide}, title = {Self-Supervised Multi-Object Tracking with Path Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19016-19026} }
Unsupervised Keypoints from Pretrained Diffusion Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Hedlin_2024_CVPR, author = {Hedlin, Eric and Sharma, Gopal and Mahajan, Shweta and He, Xingzhe and Isack, Hossam and Kar, Abhishek and Rhodin, Helge and Tagliasacchi, Andrea and Yi, Kwang Moo}, title = {Unsupervised Keypoints from Pretrained Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22820-22830} }
Resolution Limit of Single-Photon LiDAR-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chan_2024_CVPR, author = {Chan, Stanley H. and Weerasooriya, Hashan K. and Zhang, Weijian and Abshire, Pamela and Gyongy, Istvan and Henderson, Robert K.}, title = {Resolution Limit of Single-Photon LiDAR}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25307-25316} }
Flatten Long-Range Loss Landscapes for Cross-Domain Few-Shot Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zou_2024_CVPR, author = {Zou, Yixiong and Liu, Yicong and Hu, Yiman and Li, Yuhua and Li, Ruixuan}, title = {Flatten Long-Range Loss Landscapes for Cross-Domain Few-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23575-23584} }
Improving Distant 3D Object Detection Using 2D Box Supervision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Zetong and Yu, Zhiding and Choy, Chris and Wang, Renhao and Anandkumar, Anima and Alvarez, Jose M.}, title = {Improving Distant 3D Object Detection Using 2D Box Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14853-14863} }
HDQMF: Holographic Feature Decomposition Using Quantum Algorithms-
[pdf]
[supp]
[bibtex]@InProceedings{Poduval_2024_CVPR, author = {Poduval, Prathyush Prasanth and Zou, Zhuowen and Imani, Mohsen}, title = {HDQMF: Holographic Feature Decomposition Using Quantum Algorithms}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10978-10987} }
Diffusion-based Blind Text Image Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yuzhe and Zhang, Jiawei and Li, Hao and Wang, Zhouxia and Hou, Luwei and Zou, Dongqing and Bian, Liheng}, title = {Diffusion-based Blind Text Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25827-25836} }
Consistent Prompting for Rehearsal-Free Continual Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2024_CVPR, author = {Gao, Zhanxin and Cen, Jun and Chang, Xiaobin}, title = {Consistent Prompting for Rehearsal-Free Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28463-28473} }
UniPAD: A Universal Pre-training Paradigm for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Honghui and Zhang, Sha and Huang, Di and Wu, Xiaoyang and Zhu, Haoyi and He, Tong and Tang, Shixiang and Zhao, Hengshuang and Qiu, Qibo and Lin, Binbin and He, Xiaofei and Ouyang, Wanli}, title = {UniPAD: A Universal Pre-training Paradigm for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15238-15250} }
SeD: Semantic-Aware Discriminator for Image Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Bingchen and Li, Xin and Zhu, Hanxin and Jin, Yeying and Feng, Ruoyu and Zhang, Zhizheng and Chen, Zhibo}, title = {SeD: Semantic-Aware Discriminator for Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25784-25795} }
SocialCounterfactuals: Probing and Mitigating Intersectional Social Biases in Vision-Language Models with Counterfactual Examples-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Howard_2024_CVPR, author = {Howard, Phillip and Madasu, Avinash and Le, Tiep and Moreno, Gustavo Lujan and Bhiwandiwalla, Anahita and Lal, Vasudev}, title = {SocialCounterfactuals: Probing and Mitigating Intersectional Social Biases in Vision-Language Models with Counterfactual Examples}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11975-11985} }
SVDTree: Semantic Voxel Diffusion for Single Image Tree Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Yuan and Liu, Zhihao and Benes, Bedrich and Zhang, Xiaopeng and Guo, Jianwei}, title = {SVDTree: Semantic Voxel Diffusion for Single Image Tree Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4692-4702} }
Rethinking FID: Towards a Better Evaluation Metric for Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jayasumana_2024_CVPR, author = {Jayasumana, Sadeep and Ramalingam, Srikumar and Veit, Andreas and Glasner, Daniel and Chakrabarti, Ayan and Kumar, Sanjiv}, title = {Rethinking FID: Towards a Better Evaluation Metric for Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9307-9315} }
Efficient Privacy-Preserving Visual Localization Using 3D Ray Clouds-
[pdf]
[supp]
[bibtex]@InProceedings{Moon_2024_CVPR, author = {Moon, Heejoon and Lee, Chunghwan and Hong, Je Hyeong}, title = {Efficient Privacy-Preserving Visual Localization Using 3D Ray Clouds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9773-9783} }
SuperPrimitive: Scene Reconstruction at a Primitive Level-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mazur_2024_CVPR, author = {Mazur, Kirill and Bae, Gwangbin and Davison, Andrew J.}, title = {SuperPrimitive: Scene Reconstruction at a Primitive Level}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4979-4989} }
ReCoRe: Regularized Contrastive Representation Learning of World Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Poudel_2024_CVPR, author = {Poudel, Rudra P.K. and Pandya, Harit and Liwicki, Stephan and Cipolla, Roberto}, title = {ReCoRe: Regularized Contrastive Representation Learning of World Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22904-22913} }
TFMQ-DM: Temporal Feature Maintenance Quantization for Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Yushi and Gong, Ruihao and Liu, Jing and Chen, Tianlong and Liu, Xianglong}, title = {TFMQ-DM: Temporal Feature Maintenance Quantization for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7362-7371} }
CNC-Net: Self-Supervised Learning for CNC Machining Operations-
[pdf]
[supp]
[bibtex]@InProceedings{Yavartanoo_2024_CVPR, author = {Yavartanoo, Mohsen and Hong, Sangmin and Neshatavar, Reyhaneh and Lee, Kyoung Mu}, title = {CNC-Net: Self-Supervised Learning for CNC Machining Operations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9816-9825} }
JRDB-PanoTrack: An Open-world Panoptic Segmentation and Tracking Robotic Dataset in Crowded Human Environments-
[pdf]
[supp]
[bibtex]@InProceedings{Le_2024_CVPR, author = {Le, Duy Tho and Gou, Chenhui and Datta, Stavya and Shi, Hengcan and Reid, Ian and Cai, Jianfei and Rezatofighi, Hamid}, title = {JRDB-PanoTrack: An Open-world Panoptic Segmentation and Tracking Robotic Dataset in Crowded Human Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22325-22334} }
CONFORM: Contrast is All You Need for High-Fidelity Text-to-Image Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Meral_2024_CVPR, author = {Meral, Tuna Han Salih and Simsar, Enis and Tombari, Federico and Yanardag, Pinar}, title = {CONFORM: Contrast is All You Need for High-Fidelity Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9005-9014} }
Self-Supervised Facial Representation Learning with Facial Region Awareness-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2024_CVPR, author = {Gao, Zheng and Patras, Ioannis}, title = {Self-Supervised Facial Representation Learning with Facial Region Awareness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2081-2092} }
GaussianDreamer: Fast Generation from Text to 3D Gaussians by Bridging 2D and 3D Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yi_2024_CVPR, author = {Yi, Taoran and Fang, Jiemin and Wang, Junjie and Wu, Guanjun and Xie, Lingxi and Zhang, Xiaopeng and Liu, Wenyu and Tian, Qi and Wang, Xinggang}, title = {GaussianDreamer: Fast Generation from Text to 3D Gaussians by Bridging 2D and 3D Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6796-6807} }
Open-Vocabulary Attention Maps with Token Optimization for Semantic Segmentation in Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Marcos-Manchon_2024_CVPR, author = {Marcos-Manch\'on, Pablo and Alcover-Couso, Roberto and SanMiguel, Juan C. and Mart{\'\i}nez, Jos\'e M.}, title = {Open-Vocabulary Attention Maps with Token Optimization for Semantic Segmentation in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9242-9252} }
OPERA: Alleviating Hallucination in Multi-Modal Large Language Models via Over-Trust Penalty and Retrospection-Allocation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Qidong and Dong, Xiaoyi and Zhang, Pan and Wang, Bin and He, Conghui and Wang, Jiaqi and Lin, Dahua and Zhang, Weiming and Yu, Nenghai}, title = {OPERA: Alleviating Hallucination in Multi-Modal Large Language Models via Over-Trust Penalty and Retrospection-Allocation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13418-13427} }
Volumetric Environment Representation for Vision-Language Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Rui and Wang, Wenguan and Yang, Yi}, title = {Volumetric Environment Representation for Vision-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16317-16328} }
DreamComposer: Controllable 3D Object Generation via Multi-View Conditions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Yunhan and Huang, Yukun and Wu, Xiaoyang and Guo, Yuan-Chen and Zhang, Song-Hai and Zhao, Hengshuang and He, Tong and Liu, Xihui}, title = {DreamComposer: Controllable 3D Object Generation via Multi-View Conditions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8111-8120} }
Self-Calibrating Vicinal Risk Minimisation for Model Calibration-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Jiawei and Ye, Changkun and Cui, Ruikai and Barnes, Nick}, title = {Self-Calibrating Vicinal Risk Minimisation for Model Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3335-3345} }
NeRFDeformer: NeRF Transformation from a Single View via 3D Scene Flows-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2024_CVPR, author = {Tang, Zhenggang and Ren, Zhongzheng and Zhao, Xiaoming and Wen, Bowen and Tremblay, Jonathan and Birchfield, Stan and Schwing, Alexander}, title = {NeRFDeformer: NeRF Transformation from a Single View via 3D Scene Flows}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10293-10303} }
LPSNet: End-to-End Human Pose and Shape Estimation with Lensless Imaging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ge_2024_CVPR, author = {Ge, Haoyang and Feng, Qiao and Jia, Hailong and Li, Xiongzheng and Yin, Xiangjun and Zhou, You and Yang, Jingyu and Li, Kun}, title = {LPSNet: End-to-End Human Pose and Shape Estimation with Lensless Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1471-1480} }
Embracing Unimodal Aleatoric Uncertainty for Robust Multimodal Fusion-
[pdf]
[bibtex]@InProceedings{Gao_2024_CVPR, author = {Gao, Zixian and Jiang, Xun and Xu, Xing and Shen, Fumin and Li, Yujie and Shen, Heng Tao}, title = {Embracing Unimodal Aleatoric Uncertainty for Robust Multimodal Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26876-26885} }
Unifying Correspondence Pose and NeRF for Generalized Pose-Free Novel View Synthesis-
[pdf]
[supp]
[bibtex]@InProceedings{Hong_2024_CVPR, author = {Hong, Sunghwan and Jung, Jaewoo and Shin, Heeseong and Yang, Jiaolong and Kim, Seungryong and Luo, Chong}, title = {Unifying Correspondence Pose and NeRF for Generalized Pose-Free Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20196-20206} }
Draw Step by Step: Reconstructing CAD Construction Sequences from Point Clouds via Multimodal Diffusion.-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2024_CVPR, author = {Ma, Weijian and Chen, Shuaiqi and Lou, Yunzhong and Li, Xueyang and Zhou, Xiangdong}, title = {Draw Step by Step: Reconstructing CAD Construction Sequences from Point Clouds via Multimodal Diffusion.}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27154-27163} }
DiffusionTrack: Point Set Diffusion Model for Visual Object Tracking-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2024_CVPR, author = {Xie, Fei and Wang, Zhongdao and Ma, Chao}, title = {DiffusionTrack: Point Set Diffusion Model for Visual Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19113-19124} }
Towards a Simultaneous and Granular Identity-Expression Control in Personalized Face Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Renshuai and Ma, Bowen and Zhang, Wei and Hu, Zhipeng and Fan, Changjie and Lv, Tangjie and Ding, Yu and Cheng, Xuan}, title = {Towards a Simultaneous and Granular Identity-Expression Control in Personalized Face Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2114-2123} }
PEEKABOO: Interactive Video Generation via Masked-Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jain_2024_CVPR, author = {Jain, Yash and Nasery, Anshul and Vineet, Vibhav and Behl, Harkirat}, title = {PEEKABOO: Interactive Video Generation via Masked-Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8079-8088} }
Scaling Diffusion Models to Real-World 3D LiDAR Scene Completion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nunes_2024_CVPR, author = {Nunes, Lucas and Marcuzzi, Rodrigo and Mersch, Benedikt and Behley, Jens and Stachniss, Cyrill}, title = {Scaling Diffusion Models to Real-World 3D LiDAR Scene Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14770-14780} }
Discriminative Pattern Calibration Mechanism for Source-Free Domain Adaptation-
[pdf]
[bibtex]@InProceedings{Xia_2024_CVPR, author = {Xia, Haifeng and Xia, Siyu and Ding, Zhengming}, title = {Discriminative Pattern Calibration Mechanism for Source-Free Domain Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23648-23658} }
Deep Generative Model based Rate-Distortion for Image Downscaling Assessment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2024_CVPR, author = {Liang, Yuanbang and Garg, Bhavesh and Rosin, Paul and Qin, Yipeng}, title = {Deep Generative Model based Rate-Distortion for Image Downscaling Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19363-19372} }
Physical Backdoor: Towards Temperature-based Backdoor Attacks in the Physical World-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yin_2024_CVPR, author = {Yin, Wen and Lou, Jian and Zhou, Pan and Xie, Yulai and Feng, Dan and Sun, Yuhua and Zhang, Tailai and Sun, Lichao}, title = {Physical Backdoor: Towards Temperature-based Backdoor Attacks in the Physical World}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12733-12743} }
Make Me a BNN: A Simple Strategy for Estimating Bayesian Uncertainty from Pre-trained Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Franchi_2024_CVPR, author = {Franchi, Gianni and Laurent, Olivier and Leguery, Maxence and Bursuc, Andrei and Pilzer, Andrea and Yao, Angela}, title = {Make Me a BNN: A Simple Strategy for Estimating Bayesian Uncertainty from Pre-trained Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12194-12204} }
Language-only Training of Zero-shot Composed Image Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Gu_2024_CVPR, author = {Gu, Geonmo and Chun, Sanghyuk and Kim, Wonjae and Kang, Yoohoon and Yun, Sangdoo}, title = {Language-only Training of Zero-shot Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13225-13234} }
EFHQ: Multi-purpose ExtremePose-Face-HQ dataset-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dao_2024_CVPR, author = {Dao, Trung Tuan and Vu, Duc Hong and Pham, Cuong and Tran, Anh}, title = {EFHQ: Multi-purpose ExtremePose-Face-HQ dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22605-22615} }
Dynamic Cues-Assisted Transformer for Robust Point Cloud Registration-
[pdf]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Hong and Yan, Pei and Xiang, Sihe and Tan, Yihua}, title = {Dynamic Cues-Assisted Transformer for Robust Point Cloud Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21698-21707} }
Patch2Self2: Self-supervised Denoising on Coresets via Matrix Sketching-
[pdf]
[supp]
[bibtex]@InProceedings{Fadnavis_2024_CVPR, author = {Fadnavis, Shreyas and Chowdhury, Agniva and Batson, Joshua and Drineas, Petros and Garyfallidis, Eleftherios}, title = {Patch2Self2: Self-supervised Denoising on Coresets via Matrix Sketching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27641-27651} }
High-fidelity Person-centric Subject-to-Image Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Yibin and Zhang, Weizhong and Zheng, Jianwei and Jin, Cheng}, title = {High-fidelity Person-centric Subject-to-Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7675-7684} }
The Devil is in the Fine-Grained Details: Evaluating Open-Vocabulary Object Detectors for Fine-Grained Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bianchi_2024_CVPR, author = {Bianchi, Lorenzo and Carrara, Fabio and Messina, Nicola and Gennaro, Claudio and Falchi, Fabrizio}, title = {The Devil is in the Fine-Grained Details: Evaluating Open-Vocabulary Object Detectors for Fine-Grained Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22520-22529} }
Efficient and Effective Weakly-Supervised Action Segmentation via Action-Transition-Aware Boundary Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Angchi and Zheng, Wei-Shi}, title = {Efficient and Effective Weakly-Supervised Action Segmentation via Action-Transition-Aware Boundary Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18253-18262} }
Link-Context Learning for Multimodal LLMs-
[pdf]
[arXiv]
[bibtex]@InProceedings{Tai_2024_CVPR, author = {Tai, Yan and Fan, Weichen and Zhang, Zhao and Liu, Ziwei}, title = {Link-Context Learning for Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27176-27185} }
Pixel-Aligned Language Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Jiarui and Zhou, Xingyi and Yan, Shen and Gu, Xiuye and Arnab, Anurag and Sun, Chen and Wang, Xiaolong and Schmid, Cordelia}, title = {Pixel-Aligned Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13030-13039} }
JeDi: Joint-Image Diffusion Models for Finetuning-Free Personalized Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2024_CVPR, author = {Zeng, Yu and Patel, Vishal M. and Wang, Haochen and Huang, Xun and Wang, Ting-Chun and Liu, Ming-Yu and Balaji, Yogesh}, title = {JeDi: Joint-Image Diffusion Models for Finetuning-Free Personalized Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6786-6795} }
ConsistDreamer: 3D-Consistent 2D Diffusion for High-Fidelity Scene Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Jun-Kun and Bul\`o, Samuel Rota and M\"uller, Norman and Porzi, Lorenzo and Kontschieder, Peter and Wang, Yu-Xiong}, title = {ConsistDreamer: 3D-Consistent 2D Diffusion for High-Fidelity Scene Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21071-21080} }
HandDiff: 3D Hand Pose Estimation with Diffusion on Image-Point Cloud-
[pdf]
[arXiv]
[bibtex]@InProceedings{Cheng_2024_CVPR, author = {Cheng, Wencan and Tang, Hao and Van Gool, Luc and Ko, Jong Hwan}, title = {HandDiff: 3D Hand Pose Estimation with Diffusion on Image-Point Cloud}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2274-2284} }
SNIDA: Unlocking Few-Shot Object Detection with Non-linear Semantic Decoupling Augmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Yanjie and Zou, Xu and Yan, Luxin and Zhong, Sheng and Zhou, Jiahuan}, title = {SNIDA: Unlocking Few-Shot Object Detection with Non-linear Semantic Decoupling Augmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12544-12553} }
On the Robustness of Large Multimodal Models Against Image Adversarial Attacks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cui_2024_CVPR, author = {Cui, Xuanming and Aparcedo, Alejandro and Jang, Young Kyun and Lim, Ser-Nam}, title = {On the Robustness of Large Multimodal Models Against Image Adversarial Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24625-24634} }
SoundingActions: Learning How Actions Sound from Narrated Egocentric Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Changan and Ashutosh, Kumar and Girdhar, Rohit and Harwath, David and Grauman, Kristen}, title = {SoundingActions: Learning How Actions Sound from Narrated Egocentric Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27252-27262} }
Not All Voxels Are Equal: Hardness-Aware Semantic Scene Completion with Self-Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Song and Yu, Jiawei and Li, Wentong and Liu, Wenyu and Liu, Xiaolu and Chen, Junbo and Zhu, Jianke}, title = {Not All Voxels Are Equal: Hardness-Aware Semantic Scene Completion with Self-Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14792-14801} }
3D-LFM: Lifting Foundation Model-
[pdf]
[supp]
[bibtex]@InProceedings{Dabhi_2024_CVPR, author = {Dabhi, Mosam and Jeni, L\'aszl\'o A. and Lucey, Simon}, title = {3D-LFM: Lifting Foundation Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10466-10475} }
VP3D: Unleashing 2D Visual Prompt for Text-to-3D Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Yang and Pan, Yingwei and Yang, Haibo and Yao, Ting and Mei, Tao}, title = {VP3D: Unleashing 2D Visual Prompt for Text-to-3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4896-4905} }
MonoHair: High-Fidelity Hair Modeling from a Monocular Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Keyu and Yang, Lingchen and Kuang, Zhiyi and Feng, Yao and Han, Xutao and Shen, Yuefan and Fu, Hongbo and Zhou, Kun and Zheng, Youyi}, title = {MonoHair: High-Fidelity Hair Modeling from a Monocular Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24164-24173} }
Content-Style Decoupling for Unsupervised Makeup Transfer without Generating Pseudo Ground Truth-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2024_CVPR, author = {Sun, Zhaoyang and Xiong, Shengwu and Chen, Yaxiong and Rong, Yi}, title = {Content-Style Decoupling for Unsupervised Makeup Transfer without Generating Pseudo Ground Truth}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7601-7610} }
One Prompt Word is Enough to Boost Adversarial Robustness for Pre-trained Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Lin and Guan, Haoyan and Qiu, Jianing and Spratling, Michael}, title = {One Prompt Word is Enough to Boost Adversarial Robustness for Pre-trained Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24408-24419} }
A Versatile Framework for Continual Test-Time Domain Adaptation: Balancing Discriminability and Generalizability-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Xu and Chen, Xuan and Li, Moqi and Wei, Kun and Deng, Cheng}, title = {A Versatile Framework for Continual Test-Time Domain Adaptation: Balancing Discriminability and Generalizability}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23731-23740} }
Quantifying Uncertainty in Motion Prediction with Variational Bayesian Mixture-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2024_CVPR, author = {Lu, Juanwu and Cui, Can and Ma, Yunsheng and Bera, Aniket and Wang, Ziran}, title = {Quantifying Uncertainty in Motion Prediction with Variational Bayesian Mixture}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15428-15437} }
You Only Need Less Attention at Each Stage in Vision Transformers-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Shuoxi and Liu, Hanpeng and Lin, Stephen and He, Kun}, title = {You Only Need Less Attention at Each Stage in Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6057-6066} }
Sieve: Multimodal Dataset Pruning using Image Captioning Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mahmoud_2024_CVPR, author = {Mahmoud, Anas and Elhoushi, Mostafa and Abbas, Amro and Yang, Yu and Ardalani, Newsha and Leather, Hugh and Morcos, Ari S.}, title = {Sieve: Multimodal Dataset Pruning using Image Captioning Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22423-22432} }
Generalizable Novel-View Synthesis using a Stereo Camera-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2024_CVPR, author = {Lee, Haechan and Jin, Wonjoon and Baek, Seung-Hwan and Cho, Sunghyun}, title = {Generalizable Novel-View Synthesis using a Stereo Camera}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4939-4948} }
Dynamic LiDAR Re-simulation using Compositional Neural Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Hanfeng and Zuo, Xingxing and Leutenegger, Stefan and Litany, Or and Schindler, Konrad and Huang, Shengyu}, title = {Dynamic LiDAR Re-simulation using Compositional Neural Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19988-19998} }
Explaining CLIP's Performance Disparities on Data from Blind/Low Vision Users-
[pdf]
[supp]
[bibtex]@InProceedings{Massiceti_2024_CVPR, author = {Massiceti, Daniela and Longden, Camilla and Slowik, Agnieszka and Wills, Samuel and Grayson, Martin and Morrison, Cecily}, title = {Explaining CLIP's Performance Disparities on Data from Blind/Low Vision Users}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12172-12182} }
AETTA: Label-Free Accuracy Estimation for Test-Time Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2024_CVPR, author = {Lee, Taeckyung and Chottananurak, Sorn and Gong, Taesik and Lee, Sung-Ju}, title = {AETTA: Label-Free Accuracy Estimation for Test-Time Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28643-28652} }
Digital Life Project: Autonomous 3D Characters with Social Intelligence-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2024_CVPR, author = {Cai, Zhongang and Jiang, Jianping and Qing, Zhongfei and Guo, Xinying and Zhang, Mingyuan and Lin, Zhengyu and Mei, Haiyi and Wei, Chen and Wang, Ruisi and Yin, Wanqi and Pan, Liang and Fan, Xiangyu and Du, Han and Gao, Peng and Yang, Zhitao and Gao, Yang and Li, Jiaqi and Ren, Tianxiang and Wei, Yukun and Wang, Xiaogang and Loy, Chen Change and Yang, Lei and Liu, Ziwei}, title = {Digital Life Project: Autonomous 3D Characters with Social Intelligence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {582-592} }
An Empirical Study of the Generalization Ability of Lidar 3D Object Detectors to Unseen Domains-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Eskandar_2024_CVPR, author = {Eskandar, George}, title = {An Empirical Study of the Generalization Ability of Lidar 3D Object Detectors to Unseen Domains}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23815-23825} }
Unsupervised Universal Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Niu_2024_CVPR, author = {Niu, Dantong and Wang, Xudong and Han, Xinyang and Lian, Long and Herzig, Roei and Darrell, Trevor}, title = {Unsupervised Universal Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22744-22754} }
Rethinking Prior Information Generation with CLIP for Few-Shot Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Jin and Zhang, Bingfeng and Pang, Jian and Chen, Honglong and Liu, Weifeng}, title = {Rethinking Prior Information Generation with CLIP for Few-Shot Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3941-3951} }
SingularTrajectory: Universal Trajectory Predictor Using Diffusion Model-
[pdf]
[arXiv]
[bibtex]@InProceedings{Bae_2024_CVPR, author = {Bae, Inhwan and Park, Young-Jae and Jeon, Hae-Gon}, title = {SingularTrajectory: Universal Trajectory Predictor Using Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17890-17901} }
Generating Handwritten Mathematical Expressions From Symbol Graphs: An End-to-End Pipeline-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Yu and Gao, Fei and Zhang, Yanguang and Qiao, Maoying and Wang, Nannan}, title = {Generating Handwritten Mathematical Expressions From Symbol Graphs: An End-to-End Pipeline}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15675-15685} }
A Closer Look at the Few-Shot Adaptation of Large Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Silva-Rodriguez_2024_CVPR, author = {Silva-Rodr{\'\i}guez, Julio and Hajimiri, Sina and Ben Ayed, Ismail and Dolz, Jose}, title = {A Closer Look at the Few-Shot Adaptation of Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23681-23690} }
Generative Rendering: Controllable 4D-Guided Video Generation with 2D Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2024_CVPR, author = {Cai, Shengqu and Ceylan, Duygu and Gadelha, Matheus and Huang, Chun-Hao Paul and Wang, Tuanfeng Yang and Wetzstein, Gordon}, title = {Generative Rendering: Controllable 4D-Guided Video Generation with 2D Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7611-7620} }
Relightable Gaussian Codec Avatars-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Saito_2024_CVPR, author = {Saito, Shunsuke and Schwartz, Gabriel and Simon, Tomas and Li, Junxuan and Nam, Giljoo}, title = {Relightable Gaussian Codec Avatars}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {130-141} }
Why Not Use Your Textbook? Knowledge-Enhanced Procedure Planning of Instructional Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nagasinghe_2024_CVPR, author = {Nagasinghe, Kumaranage Ravindu Yasas and Zhou, Honglu and Gunawardhana, Malitha and Min, Martin Renqiang and Harari, Daniel and Khan, Muhammad Haris}, title = {Why Not Use Your Textbook? Knowledge-Enhanced Procedure Planning of Instructional Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18816-18826} }
Global and Hierarchical Geometry Consistency Priors for Few-shot NeRFs in Indoor Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2024_CVPR, author = {Sun, Xiaotian and Xu, Qingshan and Yang, Xinjie and Zang, Yu and Wang, Cheng}, title = {Global and Hierarchical Geometry Consistency Priors for Few-shot NeRFs in Indoor Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20530-20539} }
FreeKD: Knowledge Distillation via Semantic Frequency Prompt-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yuan and Huang, Tao and Liu, Jiaming and Jiang, Tao and Cheng, Kuan and Zhang, Shanghang}, title = {FreeKD: Knowledge Distillation via Semantic Frequency Prompt}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15931-15940} }
Can't Make an Omelette Without Breaking Some Eggs: Plausible Action Anticipation Using Large Video-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Mittal_2024_CVPR, author = {Mittal, Himangi and Agarwal, Nakul and Lo, Shao-Yuan and Lee, Kwonjoon}, title = {Can't Make an Omelette Without Breaking Some Eggs: Plausible Action Anticipation Using Large Video-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18580-18590} }
On the Estimation of Image-matching Uncertainty in Visual Place Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zaffar_2024_CVPR, author = {Zaffar, Mubariz and Nan, Liangliang and Kooij, Julian F. P.}, title = {On the Estimation of Image-matching Uncertainty in Visual Place Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17743-17753} }
Mask Grounding for Referring Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chng_2024_CVPR, author = {Chng, Yong Xien and Zheng, Henry and Han, Yizeng and Qiu, Xuchong and Huang, Gao}, title = {Mask Grounding for Referring Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26573-26583} }
Single-to-Dual-View Adaptation for Egocentric 3D Hand Pose Estimation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Ruicong and Ohkawa, Takehiko and Zhang, Mingfang and Sato, Yoichi}, title = {Single-to-Dual-View Adaptation for Egocentric 3D Hand Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {677-686} }
Time-Efficient Light-Field Acquisition Using Coded Aperture and Events-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Habuchi_2024_CVPR, author = {Habuchi, Shuji and Takahashi, Keita and Tsutake, Chihiro and Fujii, Toshiaki and Nagahara, Hajime}, title = {Time-Efficient Light-Field Acquisition Using Coded Aperture and Events}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24923-24933} }
EVS-assisted Joint Deblurring Rolling-Shutter Correction and Video Frame Interpolation through Sensor Inverse Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2024_CVPR, author = {Jiang, Rui and Tu, Fangwen and Long, Yixuan and Vaish, Aabhaas and Zhou, Bowen and Wang, Qinyi and Zhang, Wei and Fang, Yuntan and Capel, Luis Eduardo Garcia and Mu, Bo and Dai, Tiejun and Suess, Andreas}, title = {EVS-assisted Joint Deblurring Rolling-Shutter Correction and Video Frame Interpolation through Sensor Inverse Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25172-25181} }
Prompt-Enhanced Multiple Instance Learning for Weakly Supervised Video Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Junxi and Li, Liang and Su, Li and Zha, Zheng-jun and Huang, Qingming}, title = {Prompt-Enhanced Multiple Instance Learning for Weakly Supervised Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18319-18329} }
Animate Anyone: Consistent and Controllable Image-to-Video Synthesis for Character Animation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2024_CVPR, author = {Hu, Li}, title = {Animate Anyone: Consistent and Controllable Image-to-Video Synthesis for Character Animation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8153-8163} }
FreeCustom: Tuning-Free Customized Image Generation for Multi-Concept Composition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ding_2024_CVPR, author = {Ding, Ganggui and Zhao, Canyu and Wang, Wen and Yang, Zhen and Liu, Zide and Chen, Hao and Shen, Chunhua}, title = {FreeCustom: Tuning-Free Customized Image Generation for Multi-Concept Composition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9089-9098} }
Non-autoregressive Sequence-to-Sequence Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2024_CVPR, author = {Shi, Kunyu and Dong, Qi and Goncalves, Luis and Tu, Zhuowen and Soatto, Stefano}, title = {Non-autoregressive Sequence-to-Sequence Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13603-13612} }
MaskINT: Video Editing via Interpolative Non-autoregressive Masked Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2024_CVPR, author = {Ma, Haoyu and Mahdizadehaghdam, Shahin and Wu, Bichen and Fan, Zhipeng and Gu, Yuchao and Zhao, Wenliang and Shapira, Lior and Xie, Xiaohui}, title = {MaskINT: Video Editing via Interpolative Non-autoregressive Masked Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7403-7412} }
Active Prompt Learning in Vision Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bang_2024_CVPR, author = {Bang, Jihwan and Ahn, Sumyeong and Lee, Jae-Gil}, title = {Active Prompt Learning in Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27004-27014} }
Learning Multi-Dimensional Human Preference for Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Sixian and Wang, Bohan and Wu, Junqiang and Li, Yan and Gao, Tingting and Zhang, Di and Wang, Zhongyuan}, title = {Learning Multi-Dimensional Human Preference for Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8018-8027} }
ViVid-1-to-3: Novel View Synthesis with Video Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Kwak_2024_CVPR, author = {Kwak, Jeong-gi and Dong, Erqun and Jin, Yuhe and Ko, Hanseok and Mahajan, Shweta and Yi, Kwang Moo}, title = {ViVid-1-to-3: Novel View Synthesis with Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6775-6785} }
Active Object Detection with Knowledge Aggregation and Distillation from Large Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Dejie and Liu, Yang}, title = {Active Object Detection with Knowledge Aggregation and Distillation from Large Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16624-16633} }
NICE: Neurogenesis Inspired Contextual Encoding for Replay-free Class Incremental Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Gurbuz_2024_CVPR, author = {Gurbuz, Mustafa Burak and Moorman, Jean Michael and Dovrolis, Constantine}, title = {NICE: Neurogenesis Inspired Contextual Encoding for Replay-free Class Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23659-23669} }
Generating Human Motion in 3D Scenes from Text Descriptions-
[pdf]
[arXiv]
[bibtex]@InProceedings{Cen_2024_CVPR, author = {Cen, Zhi and Pi, Huaijin and Peng, Sida and Shen, Zehong and Yang, Minghui and Zhu, Shuai and Bao, Hujun and Zhou, Xiaowei}, title = {Generating Human Motion in 3D Scenes from Text Descriptions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1855-1866} }
Weak-to-Strong 3D Object Detection with X-Ray Distillation-
[pdf]
[supp]
[bibtex]@InProceedings{Gambashidze_2024_CVPR, author = {Gambashidze, Alexander and Dadukin, Aleksandr and Golyadkin, Maxim and Razzhivina, Maria and Makarov, Ilya}, title = {Weak-to-Strong 3D Object Detection with X-Ray Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15055-15064} }
QDFormer: Towards Robust Audiovisual Segmentation in Complex Environments with Quantization-based Semantic Decomposition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Xiang and Wang, Jinglu and Xu, Xiaohao and Peng, Xiulian and Singh, Rita and Lu, Yan and Raj, Bhiksha}, title = {QDFormer: Towards Robust Audiovisual Segmentation in Complex Environments with Quantization-based Semantic Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3402-3413} }
Active Open-Vocabulary Recognition: Let Intelligent Moving Mitigate CLIP Limitations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2024_CVPR, author = {Fan, Lei and Zhou, Jianxiong and Xing, Xiaoying and Wu, Ying}, title = {Active Open-Vocabulary Recognition: Let Intelligent Moving Mitigate CLIP Limitations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16394-16403} }
Backdoor Defense via Test-Time Detecting and Repairing-
[pdf]
[supp]
[bibtex]@InProceedings{Guan_2024_CVPR, author = {Guan, Jiyang and Liang, Jian and He, Ran}, title = {Backdoor Defense via Test-Time Detecting and Repairing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24564-24573} }
Fast Adaptation for Human Pose Estimation via Meta-Optimization-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2024_CVPR, author = {Hu, Shengxiang and Sun, Huaijiang and Li, Bin and Wei, Dong and Li, Weiqing and Lu, Jianfeng}, title = {Fast Adaptation for Human Pose Estimation via Meta-Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1792-1801} }
Efficient Meshflow and Optical Flow Estimation from Event Cameras-
[pdf]
[bibtex]@InProceedings{Luo_2024_CVPR, author = {Luo, Xinglong and Luo, Ao and Wang, Zhengning and Lin, Chunyu and Zeng, Bing and Liu, Shuaicheng}, title = {Efficient Meshflow and Optical Flow Estimation from Event Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19198-19207} }
Visual Program Distillation: Distilling Tools and Programmatic Reasoning into Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2024_CVPR, author = {Hu, Yushi and Stretcu, Otilia and Lu, Chun-Ta and Viswanathan, Krishnamurthy and Hata, Kenji and Luo, Enming and Krishna, Ranjay and Fuxman, Ariel}, title = {Visual Program Distillation: Distilling Tools and Programmatic Reasoning into Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9590-9601} }
OneFormer3D: One Transformer for Unified Point Cloud Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kolodiazhnyi_2024_CVPR, author = {Kolodiazhnyi, Maxim and Vorontsova, Anna and Konushin, Anton and Rukhovich, Danila}, title = {OneFormer3D: One Transformer for Unified Point Cloud Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20943-20953} }
JRDB-Social: A Multifaceted Robotic Dataset for Understanding of Context and Dynamics of Human Interactions Within Social Groups-
[pdf]
[supp]
[bibtex]@InProceedings{Jahangard_2024_CVPR, author = {Jahangard, Simindokht and Cai, Zhixi and Wen, Shiki and Rezatofighi, Hamid}, title = {JRDB-Social: A Multifaceted Robotic Dataset for Understanding of Context and Dynamics of Human Interactions Within Social Groups}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22087-22097} }
A Backpack Full of Skills: Egocentric Video Understanding with Diverse Task Perspectives-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peirone_2024_CVPR, author = {Peirone, Simone Alberto and Pistilli, Francesca and Alliegro, Antonio and Averta, Giuseppe}, title = {A Backpack Full of Skills: Egocentric Video Understanding with Diverse Task Perspectives}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18275-18285} }
WOUAF: Weight Modulation for User Attribution and Fingerprinting in Text-to-Image Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Changhoon and Min, Kyle and Patel, Maitreya and Cheng, Sheng and Yang, Yezhou}, title = {WOUAF: Weight Modulation for User Attribution and Fingerprinting in Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8974-8983} }
Visual In-Context Prompting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Feng and Jiang, Qing and Zhang, Hao and Ren, Tianhe and Liu, Shilong and Zou, Xueyan and Xu, Huaizhe and Li, Hongyang and Yang, Jianwei and Li, Chunyuan and Zhang, Lei and Gao, Jianfeng}, title = {Visual In-Context Prompting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12861-12871} }
Text-Conditioned Generative Model of 3D Strand-based Human Hairstyles-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sklyarova_2024_CVPR, author = {Sklyarova, Vanessa and Zakharov, Egor and Hilliges, Otmar and Black, Michael J. and Thies, Justus}, title = {Text-Conditioned Generative Model of 3D Strand-based Human Hairstyles}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4703-4712} }
GPT-4V(ision) is a Human-Aligned Evaluator for Text-to-3D Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Tong and Yang, Guandao and Li, Zhibing and Zhang, Kai and Liu, Ziwei and Guibas, Leonidas and Lin, Dahua and Wetzstein, Gordon}, title = {GPT-4V(ision) is a Human-Aligned Evaluator for Text-to-3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22227-22238} }
NTO3D: Neural Target Object 3D Reconstruction with Segment Anything-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2024_CVPR, author = {Wei, Xiaobao and Zhang, Renrui and Wu, Jiarui and Liu, Jiaming and Lu, Ming and Guo, Yandong and Zhang, Shanghang}, title = {NTO3D: Neural Target Object 3D Reconstruction with Segment Anything}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20352-20362} }
Instruct-ReID: A Multi-purpose Person Re-identification Task with Instructions-
[pdf]
[supp]
[bibtex]@InProceedings{He_2024_CVPR, author = {He, Weizhen and Deng, Yiheng and Tang, Shixiang and Chen, Qihao and Xie, Qingsong and Wang, Yizhou and Bai, Lei and Zhu, Feng and Zhao, Rui and Ouyang, Wanli and Qi, Donglian and Yan, Yunfeng}, title = {Instruct-ReID: A Multi-purpose Person Re-identification Task with Instructions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17521-17531} }
OmniMedVQA: A New Large-Scale Comprehensive Evaluation Benchmark for Medical LVLM-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2024_CVPR, author = {Hu, Yutao and Li, Tianbin and Lu, Quanfeng and Shao, Wenqi and He, Junjun and Qiao, Yu and Luo, Ping}, title = {OmniMedVQA: A New Large-Scale Comprehensive Evaluation Benchmark for Medical LVLM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22170-22183} }
Skeleton-in-Context: Unified Skeleton Sequence Modeling with In-Context Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Xinshun and Fang, Zhongbin and Li, Xia and Li, Xiangtai and Chen, Chen and Liu, Mengyuan}, title = {Skeleton-in-Context: Unified Skeleton Sequence Modeling with In-Context Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2436-2446} }
DemoFusion: Democratising High-Resolution Image Generation With No $$$-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Du_2024_CVPR, author = {Du, Ruoyi and Chang, Dongliang and Hospedales, Timothy and Song, Yi-Zhe and Ma, Zhanyu}, title = {DemoFusion: Democratising High-Resolution Image Generation With No \$\$\$}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6159-6168} }
IBD-SLAM: Learning Image-Based Depth Fusion for Generalizable SLAM-
[pdf]
[supp]
[bibtex]@InProceedings{Yin_2024_CVPR, author = {Yin, Minghao and Wu, Shangzhe and Han, Kai}, title = {IBD-SLAM: Learning Image-Based Depth Fusion for Generalizable SLAM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10563-10573} }
CPLIP: Zero-Shot Learning for Histopathology with Comprehensive Vision-Language Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Javed_2024_CVPR, author = {Javed, Sajid and Mahmood, Arif and Ganapathi, Iyyakutti Iyappan and Dharejo, Fayaz Ali and Werghi, Naoufel and Bennamoun, Mohammed}, title = {CPLIP: Zero-Shot Learning for Histopathology with Comprehensive Vision-Language Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11450-11459} }
Total Selfie: Generating Full-Body Selfies-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Bowei and Curless, Brian and Kemelmacher-Shlizerman, Ira and Seitz, Steven M.}, title = {Total Selfie: Generating Full-Body Selfies}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6701-6711} }
Visual Programming for Zero-shot Open-Vocabulary 3D Visual Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2024_CVPR, author = {Yuan, Zhihao and Ren, Jinke and Feng, Chun-Mei and Zhao, Hengshuang and Cui, Shuguang and Li, Zhen}, title = {Visual Programming for Zero-shot Open-Vocabulary 3D Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20623-20633} }
Learning Structure-from-Motion with Graph Attention Networks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Brynte_2024_CVPR, author = {Brynte, Lucas and Iglesias, Jos\'e Pedro and Olsson, Carl and Kahl, Fredrik}, title = {Learning Structure-from-Motion with Graph Attention Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4808-4817} }
Geometry Transfer for Stylizing Radiance Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jung_2024_CVPR, author = {Jung, Hyunyoung and Nam, Seonghyeon and Sarafianos, Nikolaos and Yoo, Sungjoo and Sorkine-Hornung, Alexander and Ranjan, Rakesh}, title = {Geometry Transfer for Stylizing Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8565-8575} }
Holoported Characters: Real-time Free-viewpoint Rendering of Humans from Sparse RGB Cameras-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shetty_2024_CVPR, author = {Shetty, Ashwath and Habermann, Marc and Sun, Guoxing and Luvizon, Diogo and Golyanik, Vladislav and Theobalt, Christian}, title = {Holoported Characters: Real-time Free-viewpoint Rendering of Humans from Sparse RGB Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1206-1215} }
SEAS: ShapE-Aligned Supervision for Person Re-Identification-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Haidong and Budhwant, Pranav and Zheng, Zhaoheng and Nevatia, Ram}, title = {SEAS: ShapE-Aligned Supervision for Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {164-174} }
Class Incremental Learning with Multi-Teacher Distillation-
[pdf]
[supp]
[bibtex]@InProceedings{Wen_2024_CVPR, author = {Wen, Haitao and Pan, Lili and Dai, Yu and Qiu, Heqian and Wang, Lanxiao and Wu, Qingbo and Li, Hongliang}, title = {Class Incremental Learning with Multi-Teacher Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28443-28452} }
Reg-PTQ: Regression-specialized Post-training Quantization for Fully Quantized Object Detector-
[pdf]
[supp]
[bibtex]@InProceedings{Ding_2024_CVPR, author = {Ding, Yifu and Feng, Weilun and Chen, Chuyan and Guo, Jinyang and Liu, Xianglong}, title = {Reg-PTQ: Regression-specialized Post-training Quantization for Fully Quantized Object Detector}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16174-16184} }
AMU-Tuning: Effective Logit Bias for CLIP-based Few-shot Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Tang_2024_CVPR, author = {Tang, Yuwei and Lin, Zhenyi and Wang, Qilong and Zhu, Pengfei and Hu, Qinghua}, title = {AMU-Tuning: Effective Logit Bias for CLIP-based Few-shot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23323-23333} }
Real-World Mobile Image Denoising Dataset with Efficient Baselines-
[pdf]
[supp]
[bibtex]@InProceedings{Flepp_2024_CVPR, author = {Flepp, Roman and Ignatov, Andrey and Timofte, Radu and Van Gool, Luc}, title = {Real-World Mobile Image Denoising Dataset with Efficient Baselines}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22368-22377} }
Making Vision Transformers Truly Shift-Equivariant-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rojas-Gomez_2024_CVPR, author = {Rojas-Gomez, Renan A. and Lim, Teck-Yian and Do, Minh N. and Yeh, Raymond A.}, title = {Making Vision Transformers Truly Shift-Equivariant}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5568-5577} }
SpikeNeRF: Learning Neural Radiance Fields from Continuous Spike Stream-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Lin and Jia, Kangmin and Zhao, Yifan and Qi, Yunshan and Wang, Lizhi and Huang, Hua}, title = {SpikeNeRF: Learning Neural Radiance Fields from Continuous Spike Stream}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6285-6295} }
Action Scene Graphs for Long-Form Understanding of Egocentric Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rodin_2024_CVPR, author = {Rodin, Ivan and Furnari, Antonino and Min, Kyle and Tripathi, Subarna and Farinella, Giovanni Maria}, title = {Action Scene Graphs for Long-Form Understanding of Egocentric Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18622-18632} }
A Semi-supervised Nighttime Dehazing Baseline with Spatial-Frequency Aware and Realistic Brightness Constraint-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cong_2024_CVPR, author = {Cong, Xiaofeng and Gui, Jie and Zhang, Jing and Hou, Junming and Shen, Hao}, title = {A Semi-supervised Nighttime Dehazing Baseline with Spatial-Frequency Aware and Realistic Brightness Constraint}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2631-2640} }
De-confounded Data-free Knowledge Distillation for Handling Distribution Shifts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Yuzheng and Yang, Dingkang and Chen, Zhaoyu and Liu, Yang and Liu, Siao and Zhang, Wenqiang and Zhang, Lihua and Qi, Lizhe}, title = {De-confounded Data-free Knowledge Distillation for Handling Distribution Shifts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12615-12625} }
Fine-Grained Bipartite Concept Factorization for Clustering-
[pdf]
[bibtex]@InProceedings{Peng_2024_CVPR, author = {Peng, Chong and Zhang, Pengfei and Chen, Yongyong and Kang, Zhao and Chen, Chenglizhao and Cheng, Qiang}, title = {Fine-Grained Bipartite Concept Factorization for Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26264-26274} }
Siamese Learning with Joint Alignment and Regression for Weakly-Supervised Video Paragraph Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tan_2024_CVPR, author = {Tan, Chaolei and Lai, Jianhuang and Zheng, Wei-Shi and Hu, Jian-Fang}, title = {Siamese Learning with Joint Alignment and Regression for Weakly-Supervised Video Paragraph Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13569-13580} }
Language-Driven Anchors for Zero-Shot Adversarial Robustness-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Xiao and Zhang, Wei and Liu, Yining and Hu, Zhanhao and Zhang, Bo and Hu, Xiaolin}, title = {Language-Driven Anchors for Zero-Shot Adversarial Robustness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24686-24695} }
Deep Equilibrium Diffusion Restoration with Parallel Sampling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2024_CVPR, author = {Cao, Jiezhang and Shi, Yue and Zhang, Kai and Zhang, Yulun and Timofte, Radu and Van Gool, Luc}, title = {Deep Equilibrium Diffusion Restoration with Parallel Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2824-2834} }
LEOD: Label-Efficient Object Detection for Event Cameras-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Ziyi and Gehrig, Mathias and Lyu, Qing and Liu, Xudong and Gilitschenski, Igor}, title = {LEOD: Label-Efficient Object Detection for Event Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16933-16943} }
Morphological Prototyping for Unsupervised Slide Representation Learning in Computational Pathology-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2024_CVPR, author = {Song, Andrew H. and Chen, Richard J. and Ding, Tong and Williamson, Drew F.K. and Jaume, Guillaume and Mahmood, Faisal}, title = {Morphological Prototyping for Unsupervised Slide Representation Learning in Computational Pathology}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11566-11578} }
Fooling Polarization-Based Vision using Locally Controllable Polarizing Projection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Zhuoxiao and Zhong, Zhihang and Nobuhara, Shohei and Nishino, Ko and Zheng, Yinqiang}, title = {Fooling Polarization-Based Vision using Locally Controllable Polarizing Projection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24706-24715} }
Dense Optical Tracking: Connecting the Dots-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Le_Moing_2024_CVPR, author = {Le Moing, Guillaume and Ponce, Jean and Schmid, Cordelia}, title = {Dense Optical Tracking: Connecting the Dots}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19187-19197} }
A Stealthy Wrongdoer: Feature-Oriented Reconstruction Attack against Split Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Xiaoyang and Yang, Mengda and Yi, Wenzhe and Li, Ziang and Wang, Juan and Hu, Hongxin and Zhuang, Yong and Liu, Yaxin}, title = {A Stealthy Wrongdoer: Feature-Oriented Reconstruction Attack against Split Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12130-12139} }
DiffAM: Diffusion-based Adversarial Makeup Transfer for Facial Privacy Protection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2024_CVPR, author = {Sun, Yuhao and Yu, Lingyun and Xie, Hongtao and Li, Jiaming and Zhang, Yongdong}, title = {DiffAM: Diffusion-based Adversarial Makeup Transfer for Facial Privacy Protection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24584-24594} }
SlowFormer: Adversarial Attack on Compute and Energy Consumption of Efficient Vision Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Navaneet_2024_CVPR, author = {Navaneet, K L and Koohpayegani, Soroush Abbasi and Sleiman, Essam and Pirsiavash, Hamed}, title = {SlowFormer: Adversarial Attack on Compute and Energy Consumption of Efficient Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24786-24797} }
TULIP: Transformer for Upsampling of LiDAR Point Clouds-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Bin and Pfreundschuh, Patrick and Siegwart, Roland and Hutter, Marco and Moghadam, Peyman and Patil, Vaishakh}, title = {TULIP: Transformer for Upsampling of LiDAR Point Clouds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15354-15364} }
How to Configure Good In-Context Sequence for Visual Question Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Li and Peng, Jiawei and Chen, Huiyi and Gao, Chongyang and Yang, Xu}, title = {How to Configure Good In-Context Sequence for Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26710-26720} }
Gaussian Shell Maps for Efficient 3D Human Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Abdal_2024_CVPR, author = {Abdal, Rameen and Yifan, Wang and Shi, Zifan and Xu, Yinghao and Po, Ryan and Kuang, Zhengfei and Chen, Qifeng and Yeung, Dit-Yan and Wetzstein, Gordon}, title = {Gaussian Shell Maps for Efficient 3D Human Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9441-9451} }
Defense Against Adversarial Attacks on No-Reference Image Quality Models with Gradient Norm Regularization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Yujia and Yang, Chenxi and Li, Dingquan and Ding, Jianhao and Jiang, Tingting}, title = {Defense Against Adversarial Attacks on No-Reference Image Quality Models with Gradient Norm Regularization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25554-25563} }
TACO: Benchmarking Generalizable Bimanual Tool-ACtion-Object Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Yun and Yang, Haolin and Si, Xu and Liu, Ling and Li, Zipeng and Zhang, Yuxiang and Liu, Yebin and Yi, Li}, title = {TACO: Benchmarking Generalizable Bimanual Tool-ACtion-Object Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21740-21751} }
MoST: Motion Style Transformer Between Diverse Action Contents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Boeun and Kim, Jungho and Chang, Hyung Jin and Choi, Jin Young}, title = {MoST: Motion Style Transformer Between Diverse Action Contents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1705-1714} }
Prompting Hard or Hardly Prompting: Prompt Inversion for Text-to-Image Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mahajan_2024_CVPR, author = {Mahajan, Shweta and Rahman, Tanzila and Yi, Kwang Moo and Sigal, Leonid}, title = {Prompting Hard or Hardly Prompting: Prompt Inversion for Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6808-6817} }
Unmixing Before Fusion: A Generalized Paradigm for Multi-Source-based Hyperspectral Image Synthesis-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Yang and Pan, Erting and Wang, Xinya and Wu, Yuheng and Mei, Xiaoguang and Ma, Jiayi}, title = {Unmixing Before Fusion: A Generalized Paradigm for Multi-Source-based Hyperspectral Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9297-9306} }
AlignMiF: Geometry-Aligned Multimodal Implicit Field for LiDAR-Camera Joint Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tao_2024_CVPR, author = {Tao, Tang and Wang, Guangrun and Lao, Yixing and Chen, Peng and Liu, Jie and Lin, Liang and Yu, Kaicheng and Liang, Xiaodan}, title = {AlignMiF: Geometry-Aligned Multimodal Implicit Field for LiDAR-Camera Joint Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21230-21240} }
CoDi: Conditional Diffusion Distillation for Higher-Fidelity and Faster Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mei_2024_CVPR, author = {Mei, Kangfu and Delbracio, Mauricio and Talebi, Hossein and Tu, Zhengzhong and Patel, Vishal M. and Milanfar, Peyman}, title = {CoDi: Conditional Diffusion Distillation for Higher-Fidelity and Faster Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9048-9058} }
Improving Unsupervised Hierarchical Representation with Reinforcement Learning-
[pdf]
[supp]
[bibtex]@InProceedings{An_2024_CVPR, author = {An, Ruyi and Li, Yewen and He, Xu and Gu, Pengjie and Zhao, Mengchen and Li, Dong and Hao, Jianye and Wang, Chaojie and An, Bo and Zhou, Mingyuan}, title = {Improving Unsupervised Hierarchical Representation with Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22946-22956} }
HPL-ESS: Hybrid Pseudo-Labeling for Unsupervised Event-based Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Jing_2024_CVPR, author = {Jing, Linglin and Ding, Yiming and Gao, Yunpeng and Wang, Zhigang and Yan, Xu and Wang, Dong and Schaefer, Gerald and Fang, Hui and Zhao, Bin and Li, Xuelong}, title = {HPL-ESS: Hybrid Pseudo-Labeling for Unsupervised Event-based Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23128-23137} }
X-Adapter: Adding Universal Compatibility of Plugins for Upgraded Diffusion Model-
[pdf]
[supp]
[bibtex]@InProceedings{Ran_2024_CVPR, author = {Ran, Lingmin and Cun, Xiaodong and Liu, Jia-Wei and Zhao, Rui and Zijie, Song and Wang, Xintao and Keppo, Jussi and Shou, Mike Zheng}, title = {X-Adapter: Adding Universal Compatibility of Plugins for Upgraded Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8775-8784} }
Towards General Robustness Verification of MaxPool-based Convolutional Neural Networks via Tightening Linear Approximation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2024_CVPR, author = {Xiao, Yuan and Ma, Shiqing and Zhai, Juan and Fang, Chunrong and Jia, Jinyuan and Chen, Zhenyu}, title = {Towards General Robustness Verification of MaxPool-based Convolutional Neural Networks via Tightening Linear Approximation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24766-24775} }
BT-Adapter: Video Conversation is Feasible Without Video Instruction Tuning-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Ruyang and Li, Chen and Ge, Yixiao and Li, Thomas H. and Shan, Ying and Li, Ge}, title = {BT-Adapter: Video Conversation is Feasible Without Video Instruction Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13658-13667} }
CADTalk: An Algorithm and Benchmark for Semantic Commenting of CAD Programs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2024_CVPR, author = {Yuan, Haocheng and Xu, Jing and Pan, Hao and Bousseau, Adrien and Mitra, Niloy J. and Li, Changjian}, title = {CADTalk: An Algorithm and Benchmark for Semantic Commenting of CAD Programs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3753-3762} }
Learning to Rematch Mismatched Pairs for Robust Cross-Modal Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2024_CVPR, author = {Han, Haochen and Zheng, Qinghua and Dai, Guang and Luo, Minnan and Wang, Jingdong}, title = {Learning to Rematch Mismatched Pairs for Robust Cross-Modal Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26679-26688} }
Generate Subgoal Images before Act: Unlocking the Chain-of-Thought Reasoning in Diffusion Model for Robot Manipulation with Multimodal Prompts-
[pdf]
[supp]
[bibtex]@InProceedings{Ni_2024_CVPR, author = {Ni, Fei and Hao, Jianye and Wu, Shiguang and Kou, Longxin and Liu, Jiashun and Zheng, Yan and Wang, Bin and Zhuang, Yuzheng}, title = {Generate Subgoal Images before Act: Unlocking the Chain-of-Thought Reasoning in Diffusion Model for Robot Manipulation with Multimodal Prompts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13991-14000} }
Asymmetric Masked Distillation for Pre-Training Small Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Zhiyu and Huang, Bingkun and Xing, Sen and Wu, Gangshan and Qiao, Yu and Wang, Limin}, title = {Asymmetric Masked Distillation for Pre-Training Small Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18516-18526} }
Inversion-Free Image Editing with Language-Guided Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Sihan and Huang, Yidong and Pan, Jiayi and Ma, Ziqiao and Chai, Joyce}, title = {Inversion-Free Image Editing with Language-Guided Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9452-9461} }
HumMUSS: Human Motion Understanding using State Space Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mondal_2024_CVPR, author = {Mondal, Arnab and Alletto, Stefano and Tome, Denis}, title = {HumMUSS: Human Motion Understanding using State Space Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2318-2330} }
MP5: A Multi-modal Open-ended Embodied System in Minecraft via Active Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qin_2024_CVPR, author = {Qin, Yiran and Zhou, Enshen and Liu, Qichang and Yin, Zhenfei and Sheng, Lu and Zhang, Ruimao and Qiao, Yu and Shao, Jing}, title = {MP5: A Multi-modal Open-ended Embodied System in Minecraft via Active Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16307-16316} }
Uncovering What Why and How: A Comprehensive Benchmark for Causation Understanding of Video Anomaly-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Du_2024_CVPR, author = {Du, Hang and Zhang, Sicheng and Xie, Binzhu and Nan, Guoshun and Zhang, Jiayang and Xu, Junrui and Liu, Hangyu and Leng, Sicong and Liu, Jiangming and Fan, Hehe and Huang, Dajiu and Feng, Jing and Chen, Linli and Zhang, Can and Li, Xuhuan and Zhang, Hao and Chen, Jianhang and Cui, Qimei and Tao, Xiaofeng}, title = {Uncovering What Why and How: A Comprehensive Benchmark for Causation Understanding of Video Anomaly}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18793-18803} }
MiKASA: Multi-Key-Anchor & Scene-Aware Transformer for 3D Visual Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chang_2024_CVPR, author = {Chang, Chun-Peng and Wang, Shaoxiang and Pagani, Alain and Stricker, Didier}, title = {MiKASA: Multi-Key-Anchor \& Scene-Aware Transformer for 3D Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14131-14140} }
ZePT: Zero-Shot Pan-Tumor Segmentation via Query-Disentangling and Self-Prompting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2024_CVPR, author = {Jiang, Yankai and Huang, Zhongzhen and Zhang, Rongzhao and Zhang, Xiaofan and Zhang, Shaoting}, title = {ZePT: Zero-Shot Pan-Tumor Segmentation via Query-Disentangling and Self-Prompting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11386-11397} }
Task-Driven Exploration: Decoupling and Inter-Task Feedback for Joint Moment Retrieval and Highlight Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Jin and Wei, Ping and Li, Huan and Ren, Ziyang}, title = {Task-Driven Exploration: Decoupling and Inter-Task Feedback for Joint Moment Retrieval and Highlight Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18308-18318} }
MobileCLIP: Fast Image-Text Models through Multi-Modal Reinforced Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vasu_2024_CVPR, author = {Vasu, Pavan Kumar Anasosalu and Pouransari, Hadi and Faghri, Fartash and Vemulapalli, Raviteja and Tuzel, Oncel}, title = {MobileCLIP: Fast Image-Text Models through Multi-Modal Reinforced Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15963-15974} }
Drag Your Noise: Interactive Point-based Editing via Diffusion Semantic Propagation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Haofeng and Xu, Chenshu and Yang, Yifei and Zeng, Lihua and He, Shengfeng}, title = {Drag Your Noise: Interactive Point-based Editing via Diffusion Semantic Propagation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6743-6752} }
CDMAD: Class-Distribution-Mismatch-Aware Debiasing for Class-Imbalanced Semi-Supervised Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2024_CVPR, author = {Lee, Hyuck and Kim, Heeyoung}, title = {CDMAD: Class-Distribution-Mismatch-Aware Debiasing for Class-Imbalanced Semi-Supervised Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23891-23900} }
VideoCon: Robust Video-Language Alignment via Contrast Captions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bansal_2024_CVPR, author = {Bansal, Hritik and Bitton, Yonatan and Szpektor, Idan and Chang, Kai-Wei and Grover, Aditya}, title = {VideoCon: Robust Video-Language Alignment via Contrast Captions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13927-13937} }
PanoPose: Self-supervised Relative Pose Estimation for Panoramic Images-
[pdf]
[supp]
[bibtex]@InProceedings{Tu_2024_CVPR, author = {Tu, Diantao and Cui, Hainan and Zheng, Xianwei and Shen, Shuhan}, title = {PanoPose: Self-supervised Relative Pose Estimation for Panoramic Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20009-20018} }
ContextSeg: Sketch Semantic Segmentation by Querying the Context with Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Jiawei and Li, Changjian}, title = {ContextSeg: Sketch Semantic Segmentation by Querying the Context with Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3679-3688} }
Describing Differences in Image Sets with Natural Language-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dunlap_2024_CVPR, author = {Dunlap, Lisa and Zhang, Yuhui and Wang, Xiaohan and Zhong, Ruiqi and Darrell, Trevor and Steinhardt, Jacob and Gonzalez, Joseph E. and Yeung-Levy, Serena}, title = {Describing Differences in Image Sets with Natural Language}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24199-24208} }
Discovering and Mitigating Visual Biases through Keyword Explanation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Younghyun and Mo, Sangwoo and Kim, Minkyu and Lee, Kyungmin and Lee, Jaeho and Shin, Jinwoo}, title = {Discovering and Mitigating Visual Biases through Keyword Explanation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11082-11092} }
Robust Emotion Recognition in Context Debiasing-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Dingkang and Yang, Kun and Li, Mingcheng and Wang, Shunli and Wang, Shuaibing and Zhang, Lihua}, title = {Robust Emotion Recognition in Context Debiasing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12447-12457} }
Fully Geometric Panoramic Localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Junho and Jeong, Jiwon and Kim, Young Min}, title = {Fully Geometric Panoramic Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20827-20837} }
CAPE: CAM as a Probabilistic Ensemble for Enhanced DNN Interpretation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chowdhury_2024_CVPR, author = {Chowdhury, Townim Faisal and Liao, Kewen and Phan, Vu Minh Hieu and To, Minh-Son and Xie, Yutong and Hung, Kevin and Ross, David and van den Hengel, Anton and Verjans, Johan W. and Liao, Zhibin}, title = {CAPE: CAM as a Probabilistic Ensemble for Enhanced DNN Interpretation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11072-11081} }
NeRF Director: Revisiting View Selection in Neural Volume Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2024_CVPR, author = {Xiao, Wenhui and Cruz, Rodrigo Santa and Ahmedt-Aristizabal, David and Salvado, Olivier and Fookes, Clinton and Lebrat, Leo}, title = {NeRF Director: Revisiting View Selection in Neural Volume Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20742-20751} }
Taming the Tail in Class-Conditional GANs: Knowledge Sharing via Unconditional Training at Lower Resolutions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Khorram_2024_CVPR, author = {Khorram, Saeed and Jiang, Mingqi and Shahbazi, Mohamad and Danesh, Mohamad H. and Fuxin, Li}, title = {Taming the Tail in Class-Conditional GANs: Knowledge Sharing via Unconditional Training at Lower Resolutions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7580-7590} }
VideoSwap: Customized Video Subject Swapping with Interactive Semantic Point Correspondence-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gu_2024_CVPR, author = {Gu, Yuchao and Zhou, Yipin and Wu, Bichen and Yu, Licheng and Liu, Jia-Wei and Zhao, Rui and Wu, Jay Zhangjie and Zhang, David Junhao and Shou, Mike Zheng and Tang, Kevin}, title = {VideoSwap: Customized Video Subject Swapping with Interactive Semantic Point Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7621-7630} }
SonicVisionLM: Playing Sound with Vision Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2024_CVPR, author = {Xie, Zhifeng and Yu, Shengye and He, Qile and Li, Mengtian}, title = {SonicVisionLM: Playing Sound with Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26866-26875} }
Multi-Space Alignments Towards Universal LiDAR Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Youquan and Kong, Lingdong and Wu, Xiaoyang and Chen, Runnan and Li, Xin and Pan, Liang and Liu, Ziwei and Ma, Yuexin}, title = {Multi-Space Alignments Towards Universal LiDAR Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14648-14661} }
DiffuScene: Denoising Diffusion Models for Generative Indoor Scene Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2024_CVPR, author = {Tang, Jiapeng and Nie, Yinyu and Markhasin, Lev and Dai, Angela and Thies, Justus and Nie{\ss}ner, Matthias}, title = {DiffuScene: Denoising Diffusion Models for Generative Indoor Scene Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20507-20518} }
Hierarchical Histogram Threshold Segmentation - Auto-terminating High-detail Oversegmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Chang_2024_CVPR, author = {Chang, Thomas V. and Seibt, Simon and von Rymon Lipinski, Bartosz}, title = {Hierarchical Histogram Threshold Segmentation - Auto-terminating High-detail Oversegmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3195-3204} }
Once for Both: Single Stage of Importance and Sparsity Search for Vision Transformer Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2024_CVPR, author = {Ye, Hancheng and Yu, Chong and Ye, Peng and Xia, Renqiu and Tang, Yansong and Lu, Jiwen and Chen, Tao and Zhang, Bo}, title = {Once for Both: Single Stage of Importance and Sparsity Search for Vision Transformer Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5578-5588} }
As-Plausible-As-Possible: Plausibility-Aware Mesh Deformation Using 2D Diffusion Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Yoo_2024_CVPR, author = {Yoo, Seungwoo and Kim, Kunho and Kim, Vladimir G. and Sung, Minhyuk}, title = {As-Plausible-As-Possible: Plausibility-Aware Mesh Deformation Using 2D Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4315-4324} }
MCNet: Rethinking the Core Ingredients for Accurate and Efficient Homography Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Haokai and Cao, Si-Yuan and Hu, Jianxin and Zuo, Sitong and Yu, Beinan and Ying, Jiacheng and Li, Junwei and Shen, Hui-Liang}, title = {MCNet: Rethinking the Core Ingredients for Accurate and Efficient Homography Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25932-25941} }
ECLIPSE: Efficient Continual Learning in Panoptic Segmentation with Visual Prompt Tuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Beomyoung and Yu, Joonsang and Hwang, Sung Ju}, title = {ECLIPSE: Efficient Continual Learning in Panoptic Segmentation with Visual Prompt Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3346-3356} }
Boosting Continual Learning of Vision-Language Models via Mixture-of-Experts Adapters-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Jiazuo and Zhuge, Yunzhi and Zhang, Lu and Hu, Ping and Wang, Dong and Lu, Huchuan and He, You}, title = {Boosting Continual Learning of Vision-Language Models via Mixture-of-Experts Adapters}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23219-23230} }
MaGGIe: Masked Guided Gradual Human Instance Matting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huynh_2024_CVPR, author = {Huynh, Chuong and Oh, Seoung Wug and Shrivastava, Abhinav and Lee, Joon-Young}, title = {MaGGIe: Masked Guided Gradual Human Instance Matting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3870-3879} }
FlowDiffuser: Advancing Optical Flow Estimation with Diffusion Models-
[pdf]
[bibtex]@InProceedings{Luo_2024_CVPR, author = {Luo, Ao and Li, Xin and Yang, Fan and Liu, Jiangyu and Fan, Haoqiang and Liu, Shuaicheng}, title = {FlowDiffuser: Advancing Optical Flow Estimation with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19167-19176} }
Benchmarking Implicit Neural Representation and Geometric Rendering in Real-Time RGB-D SLAM-
[pdf]
[supp]
[bibtex]@InProceedings{Hua_2024_CVPR, author = {Hua, Tongyan and Wang, Lin}, title = {Benchmarking Implicit Neural Representation and Geometric Rendering in Real-Time RGB-D SLAM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21346-21356} }
Free3D: Consistent Novel View Synthesis without 3D Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2024_CVPR, author = {Zheng, Chuanxia and Vedaldi, Andrea}, title = {Free3D: Consistent Novel View Synthesis without 3D Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9720-9731} }
SuperSVG: Superpixel-based Scalable Vector Graphics Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2024_CVPR, author = {Hu, Teng and Yi, Ran and Qian, Baihong and Zhang, Jiangning and Rosin, Paul L. and Lai, Yu-Kun}, title = {SuperSVG: Superpixel-based Scalable Vector Graphics Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24892-24901} }
AV2AV: Direct Audio-Visual Speech to Audio-Visual Speech Translation with Unified Audio-Visual Speech Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2024_CVPR, author = {Choi, Jeongsoo and Park, Se Jin and Kim, Minsu and Ro, Yong Man}, title = {AV2AV: Direct Audio-Visual Speech to Audio-Visual Speech Translation with Unified Audio-Visual Speech Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27325-27337} }
Towards the Uncharted: Density-Descending Feature Perturbation for Semi-supervised Semantic Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Xiaoyang and Bai, Huihui and Yu, Limin and Zhao, Yao and Xiao, Jimin}, title = {Towards the Uncharted: Density-Descending Feature Perturbation for Semi-supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3303-3312} }
WALT3D: Generating Realistic Training Data from Time-Lapse Imagery for Reconstructing Dynamic Objects Under Occlusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vuong_2024_CVPR, author = {Vuong, Khiem and Reddy, N Dinesh and Tamburo, Robert and Narasimhan, Srinivasa G.}, title = {WALT3D: Generating Realistic Training Data from Time-Lapse Imagery for Reconstructing Dynamic Objects Under Occlusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9514-9524} }
RTMO: Towards High-Performance One-Stage Real-Time Multi-Person Pose Estimation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lu_2024_CVPR, author = {Lu, Peng and Jiang, Tao and Li, Yining and Li, Xiangtai and Chen, Kai and Yang, Wenming}, title = {RTMO: Towards High-Performance One-Stage Real-Time Multi-Person Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1491-1500} }
Contrastive Mean-Shift Learning for Generalized Category Discovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2024_CVPR, author = {Choi, Sua and Kang, Dahyun and Cho, Minsu}, title = {Contrastive Mean-Shift Learning for Generalized Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23094-23104} }
Towards Language-Driven Video Inpainting via Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Jianzong and Li, Xiangtai and Si, Chenyang and Zhou, Shangchen and Yang, Jingkang and Zhang, Jiangning and Li, Yining and Chen, Kai and Tong, Yunhai and Liu, Ziwei and Loy, Chen Change}, title = {Towards Language-Driven Video Inpainting via Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12501-12511} }
WaveFace: Authentic Face Restoration with Efficient Frequency Recovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Miao_2024_CVPR, author = {Miao, Yunqi and Deng, Jiankang and Han, Jungong}, title = {WaveFace: Authentic Face Restoration with Efficient Frequency Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6583-6592} }
CLIP-KD: An Empirical Study of CLIP Model Distillation-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Chuanguang and An, Zhulin and Huang, Libo and Bi, Junyu and Yu, Xinqiang and Yang, Han and Diao, Boyu and Xu, Yongjun}, title = {CLIP-KD: An Empirical Study of CLIP Model Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15952-15962} }
UltrAvatar: A Realistic Animatable 3D Avatar Diffusion Model with Authenticity Guided Textures-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Mingyuan and Hyder, Rakib and Xuan, Ziwei and Qi, Guojun}, title = {UltrAvatar: A Realistic Animatable 3D Avatar Diffusion Model with Authenticity Guided Textures}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1238-1248} }
OneTracker: Unifying Visual Object Tracking with Foundation Models and Efficient Tuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hong_2024_CVPR, author = {Hong, Lingyi and Yan, Shilin and Zhang, Renrui and Li, Wanyun and Zhou, Xinyu and Guo, Pinxue and Jiang, Kaixun and Chen, Yiting and Li, Jinglun and Chen, Zhaoyu and Zhang, Wenqiang}, title = {OneTracker: Unifying Visual Object Tracking with Foundation Models and Efficient Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19079-19091} }
SC-Tune: Unleashing Self-Consistent Referential Comprehension in Large Vision Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Yue_2024_CVPR, author = {Yue, Tongtian and Cheng, Jie and Guo, Longteng and Dai, Xingyuan and Zhao, Zijia and He, Xingjian and Xiong, Gang and Lv, Yisheng and Liu, Jing}, title = {SC-Tune: Unleashing Self-Consistent Referential Comprehension in Large Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13073-13083} }
Improving Depth Completion via Depth Feature Upsampling-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Yufei and Zhang, Ge and Wang, Shaoqian and Li, Bo and Liu, Qi and Hui, Le and Dai, Yuchao}, title = {Improving Depth Completion via Depth Feature Upsampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21104-21113} }
NeRSP: Neural 3D Reconstruction for Reflective Objects with Sparse Polarized Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2024_CVPR, author = {Han, Yufei and Guo, Heng and Fukai, Koki and Santo, Hiroaki and Shi, Boxin and Okura, Fumio and Ma, Zhanyu and Jia, Yunpeng}, title = {NeRSP: Neural 3D Reconstruction for Reflective Objects with Sparse Polarized Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11821-11830} }
Retrieval-Augmented Embodied Agents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Yichen and Ou, Zhicai and Mou, Xiaofeng and Tang, Jian}, title = {Retrieval-Augmented Embodied Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17985-17995} }
SAFDNet: A Simple and Effective Network for Fully Sparse 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Gang and Chen, Junnan and Gao, Guohuan and Li, Jianmin and Liu, Si and Hu, Xiaolin}, title = {SAFDNet: A Simple and Effective Network for Fully Sparse 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14477-14486} }
Attention-Propagation Network for Egocentric Heatmap to 3D Pose Lifting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2024_CVPR, author = {Kang, Taeho and Lee, Youngki}, title = {Attention-Propagation Network for Egocentric Heatmap to 3D Pose Lifting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {842-851} }
OmniMotionGPT: Animal Motion Generation with Limited Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Zhangsihao and Zhou, Mingyuan and Shan, Mengyi and Wen, Bingbing and Xuan, Ziwei and Hill, Mitch and Bai, Junjie and Qi, Guo-Jun and Wang, Yalin}, title = {OmniMotionGPT: Animal Motion Generation with Limited Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1249-1259} }
SNI-SLAM: Semantic Neural Implicit SLAM-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Siting and Wang, Guangming and Blum, Hermann and Liu, Jiuming and Song, Liang and Pollefeys, Marc and Wang, Hesheng}, title = {SNI-SLAM: Semantic Neural Implicit SLAM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21167-21177} }
InstanceDiffusion: Instance-level Control for Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Xudong and Darrell, Trevor and Rambhatla, Sai Saketh and Girdhar, Rohit and Misra, Ishan}, title = {InstanceDiffusion: Instance-level Control for Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6232-6242} }
Unifying Top-down and Bottom-up Scanpath Prediction Using Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Zhibo and Mondal, Sounak and Ahn, Seoyoung and Xue, Ruoyu and Zelinsky, Gregory and Hoai, Minh and Samaras, Dimitris}, title = {Unifying Top-down and Bottom-up Scanpath Prediction Using Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1683-1693} }
HINTED: Hard Instance Enhanced Detector with Mixed-Density Feature Fusion for Sparsely-Supervised 3D Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Xia_2024_CVPR, author = {Xia, Qiming and Ye, Wei and Wu, Hai and Zhao, Shijia and Xing, Leyuan and Huang, Xun and Deng, Jinhao and Li, Xin and Wen, Chenglu and Wang, Cheng}, title = {HINTED: Hard Instance Enhanced Detector with Mixed-Density Feature Fusion for Sparsely-Supervised 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15321-15330} }
Structured Gradient-based Interpretations via Norm-Regularized Adversarial Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gong_2024_CVPR, author = {Gong, Shizhan and Dou, Qi and Farnia, Farzan}, title = {Structured Gradient-based Interpretations via Norm-Regularized Adversarial Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11009-11018} }
Building a Strong Pre-Training Baseline for Universal 3D Large-Scale Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Haoming and Zhang, Zhizhong and Qu, Yanyun and Zhang, Ruixin and Tan, Xin and Xie, Yuan}, title = {Building a Strong Pre-Training Baseline for Universal 3D Large-Scale Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19925-19935} }
DS-NeRV: Implicit Neural Video Representation with Decomposed Static and Dynamic Codes-
[pdf]
[supp]
[bibtex]@InProceedings{Yan_2024_CVPR, author = {Yan, Hao and Ke, Zhihui and Zhou, Xiaobo and Qiu, Tie and Shi, Xidong and Jiang, Dadong}, title = {DS-NeRV: Implicit Neural Video Representation with Decomposed Static and Dynamic Codes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23019-23029} }
3D-Aware Face Editing via Warping-Guided Latent Direction Learning-
[pdf]
[bibtex]@InProceedings{Cheng_2024_CVPR, author = {Cheng, Yuhao and Chen, Zhuo and Ren, Xingyu and Zhu, Wenhan and Xu, Zhengqin and Xu, Di and Yang, Changpeng and Yan, Yichao}, title = {3D-Aware Face Editing via Warping-Guided Latent Direction Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {916-926} }
3DFIRES: Few Image 3D REconstruction for Scenes with Hidden Surfaces-
[pdf]
[arXiv]
[bibtex]@InProceedings{Jin_2024_CVPR, author = {Jin, Linyi and Kulkarni, Nilesh and Fouhey, David F.}, title = {3DFIRES: Few Image 3D REconstruction for Scenes with Hidden Surfaces}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9742-9751} }
CAT-Seg: Cost Aggregation for Open-Vocabulary Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Cho_2024_CVPR, author = {Cho, Seokju and Shin, Heeseong and Hong, Sunghwan and Arnab, Anurag and Seo, Paul Hongsuck and Kim, Seungryong}, title = {CAT-Seg: Cost Aggregation for Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4113-4123} }
Focus on Your Instruction: Fine-grained and Multi-instruction Image Editing by Attention Modulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2024_CVPR, author = {Guo, Qin and Lin, Tianwei}, title = {Focus on Your Instruction: Fine-grained and Multi-instruction Image Editing by Attention Modulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6986-6996} }
SDSTrack: Self-Distillation Symmetric Adapter Learning for Multi-Modal Visual Object Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hou_2024_CVPR, author = {Hou, Xiaojun and Xing, Jiazheng and Qian, Yijie and Guo, Yaowei and Xin, Shuo and Chen, Junhao and Tang, Kai and Wang, Mengmeng and Jiang, Zhengkai and Liu, Liang and Liu, Yong}, title = {SDSTrack: Self-Distillation Symmetric Adapter Learning for Multi-Modal Visual Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26551-26561} }
MCPNet: An Interpretable Classifier via Multi-Level Concept Prototypes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Bor-Shiun and Wang, Chien-Yi and Chiu, Wei-Chen}, title = {MCPNet: An Interpretable Classifier via Multi-Level Concept Prototypes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10885-10894} }
Semantic Shield: Defending Vision-Language Models Against Backdooring and Poisoning via Fine-grained Knowledge Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Ishmam_2024_CVPR, author = {Ishmam, Alvi Md and Thomas, Christopher}, title = {Semantic Shield: Defending Vision-Language Models Against Backdooring and Poisoning via Fine-grained Knowledge Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24820-24830} }
AvatarGPT: All-in-One Framework for Motion Understanding Planning Generation and Beyond-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Zixiang and Wan, Yu and Wang, Baoyuan}, title = {AvatarGPT: All-in-One Framework for Motion Understanding Planning Generation and Beyond}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1357-1366} }
Rethinking the Up-Sampling Operations in CNN-based Generative Network for Generalizable Deepfake Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Tan_2024_CVPR, author = {Tan, Chuangchuang and Zhao, Yao and Wei, Shikui and Gu, Guanghua and Liu, Ping and Wei, Yunchao}, title = {Rethinking the Up-Sampling Operations in CNN-based Generative Network for Generalizable Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28130-28139} }
Co-Speech Gesture Video Generation via Motion-Decoupled Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2024_CVPR, author = {He, Xu and Huang, Qiaochu and Zhang, Zhensong and Lin, Zhiwei and Wu, Zhiyong and Yang, Sicheng and Li, Minglei and Chen, Zhiyi and Xu, Songcen and Wu, Xiaofei}, title = {Co-Speech Gesture Video Generation via Motion-Decoupled Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2263-2273} }
CDFormer: When Degradation Prediction Embraces Diffusion Model for Blind Image Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Qingguo and Zhuang, Chenyi and Gao, Pan and Qin, Jie}, title = {CDFormer: When Degradation Prediction Embraces Diffusion Model for Blind Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7455-7464} }
HumanRef: Single Image to 3D Human Generation via Reference-Guided Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Jingbo and Li, Xiaoyu and Zhang, Qi and Cao, Yanpei and Shan, Ying and Liao, Jing}, title = {HumanRef: Single Image to 3D Human Generation via Reference-Guided Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1844-1854} }
GlitchBench: Can Large Multimodal Models Detect Video Game Glitches?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Taesiri_2024_CVPR, author = {Taesiri, Mohammad Reza and Feng, Tianjun and Bezemer, Cor-Paul and Nguyen, Anh}, title = {GlitchBench: Can Large Multimodal Models Detect Video Game Glitches?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22444-22455} }
Rethinking Interactive Image Segmentation with Low Latency High Quality and Diverse Prompts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Qin and Cho, Jaemin and Bansal, Mohit and Niethammer, Marc}, title = {Rethinking Interactive Image Segmentation with Low Latency High Quality and Diverse Prompts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3773-3782} }
ALGM: Adaptive Local-then-Global Token Merging for Efficient Semantic Segmentation with Plain Vision Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Norouzi_2024_CVPR, author = {Norouzi, Narges and Orlova, Svetlana and de Geus, Daan and Dubbelman, Gijs}, title = {ALGM: Adaptive Local-then-Global Token Merging for Efficient Semantic Segmentation with Plain Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15773-15782} }
DITTO: Dual and Integrated Latent Topologies for Implicit 3D Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shim_2024_CVPR, author = {Shim, Jaehyeok and Joo, Kyungdon}, title = {DITTO: Dual and Integrated Latent Topologies for Implicit 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5396-5405} }
Single-Model and Any-Modality for Video Object Tracking-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Zongwei and Zheng, Jilai and Ren, Xiangxuan and Vasluianu, Florin-Alexandru and Ma, Chao and Paudel, Danda Pani and Van Gool, Luc and Timofte, Radu}, title = {Single-Model and Any-Modality for Video Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19156-19166} }
FlowTrack: Revisiting Optical Flow for Long-Range Dense Tracking-
[pdf]
[bibtex]@InProceedings{Cho_2024_CVPR, author = {Cho, Seokju and Huang, Jiahui and Kim, Seungryong and Lee, Joon-Young}, title = {FlowTrack: Revisiting Optical Flow for Long-Range Dense Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19268-19277} }
HIT: Estimating Internal Human Implicit Tissues from the Body Surface-
[pdf]
[supp]
[bibtex]@InProceedings{Keller_2024_CVPR, author = {Keller, Marilyn and Arora, Vaibhav and Dakri, Abdelmouttaleb and Chandhok, Shivam and Machann, J\"urgen and Fritsche, Andreas and Black, Michael J. and Pujades, Sergi}, title = {HIT: Estimating Internal Human Implicit Tissues from the Body Surface}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3480-3490} }
DanceCamera3D: 3D Camera Movement Synthesis with Music and Dance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Zixuan and Jia, Jia and Sun, Shikun and Wu, Haozhe and Han, Rong and Li, Zhenyu and Tang, Di and Zhou, Jiaqing and Luo, Jiebo}, title = {DanceCamera3D: 3D Camera Movement Synthesis with Music and Dance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7892-7901} }
Synthesize Diagnose and Optimize: Towards Fine-Grained Vision-Language Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2024_CVPR, author = {Peng, Wujian and Xie, Sicheng and You, Zuyao and Lan, Shiyi and Wu, Zuxuan}, title = {Synthesize Diagnose and Optimize: Towards Fine-Grained Vision-Language Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13279-13288} }
Density-guided Translator Boosts Synthetic-to-Real Unsupervised Domain Adaptive Segmentation of 3D Point Clouds-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2024_CVPR, author = {Yuan, Zhimin and Zeng, Wankang and Su, Yanfei and Liu, Weiquan and Cheng, Ming and Guo, Yulan and Wang, Cheng}, title = {Density-guided Translator Boosts Synthetic-to-Real Unsupervised Domain Adaptive Segmentation of 3D Point Clouds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23303-23312} }
Cross Initialization for Face Personalization of Text-to-Image Models-
[pdf]
[supp]
[bibtex]@InProceedings{Pang_2024_CVPR, author = {Pang, Lianyu and Yin, Jian and Xie, Haoran and Wang, Qiping and Li, Qing and Mao, Xudong}, title = {Cross Initialization for Face Personalization of Text-to-Image Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8393-8403} }
LEDITS++: Limitless Image Editing using Text-to-Image Models-
[pdf]
[supp]
[bibtex]@InProceedings{Brack_2024_CVPR, author = {Brack, Manuel and Friedrich, Felix and Kornmeier, Katharia and Tsaban, Linoy and Schramowski, Patrick and Kersting, Kristian and Passos, Apolinario}, title = {LEDITS++: Limitless Image Editing using Text-to-Image Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8861-8870} }
Video Interpolation with Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jain_2024_CVPR, author = {Jain, Siddhant and Watson, Daniel and Tabellion, Eric and Ho?ynski, Aleksander and Poole, Ben and Kontkanen, Janne}, title = {Video Interpolation with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7341-7351} }
WildlifeMapper: Aerial Image Analysis for Multi-Species Detection and Identification-
[pdf]
[supp]
[bibtex]@InProceedings{Kumar_2024_CVPR, author = {Kumar, Satish and Zhang, Bowen and Gudavalli, Chandrakanth and Levenson, Connor and Hughey, Lacey and Stabach, Jared A. and Amoke, Irene and Ojwang, Gordon and Mukeka, Joseph and Mwiu, Stephen and Ogutu, Joseph and Frederick, Howard and Manjunath, B.S.}, title = {WildlifeMapper: Aerial Image Analysis for Multi-Species Detection and Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12594-12604} }
Learning Adaptive Spatial Coherent Correlations for Speech-Preserving Facial Expression Manipulation-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Tianshui and Lin, Jianman and Yang, Zhijing and Qing, Chunmei and Lin, Liang}, title = {Learning Adaptive Spatial Coherent Correlations for Speech-Preserving Facial Expression Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7267-7276} }
Tune-An-Ellipse: CLIP Has Potential to Find What You Want-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2024_CVPR, author = {Xie, Jinheng and Deng, Songhe and Li, Bing and Liu, Haozhe and Huang, Yawen and Zheng, Yefeng and Schmidhuber, Jurgen and Ghanem, Bernard and Shen, Linlin and Shou, Mike Zheng}, title = {Tune-An-Ellipse: CLIP Has Potential to Find What You Want}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13723-13732} }
Neural Spline Fields for Burst Image Fusion and Layer Separation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chugunov_2024_CVPR, author = {Chugunov, Ilya and Shustin, David and Yan, Ruyu and Lei, Chenyang and Heide, Felix}, title = {Neural Spline Fields for Burst Image Fusion and Layer Separation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25763-25773} }
WHAM: Reconstructing World-grounded Humans with Accurate 3D Motion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shin_2024_CVPR, author = {Shin, Soyong and Kim, Juyong and Halilaj, Eni and Black, Michael J.}, title = {WHAM: Reconstructing World-grounded Humans with Accurate 3D Motion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2070-2080} }
NAPGuard: Towards Detecting Naturalistic Adversarial Patches-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Siyang and Wang, Jiakai and Zhao, Jiejie and Wang, Yazhe and Liu, Xianglong}, title = {NAPGuard: Towards Detecting Naturalistic Adversarial Patches}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24367-24376} }
DiffPerformer: Iterative Learning of Consistent Latent Guidance for Diffusion-based Human Video Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Chenyang and Zheng, Zerong and Yu, Tao and Lv, Xiaoqian and Zhong, Bineng and Zhang, Shengping and Nie, Liqiang}, title = {DiffPerformer: Iterative Learning of Consistent Latent Guidance for Diffusion-based Human Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6169-6179} }
Unified Language-driven Zero-shot Domain Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Senqiao and Tian, Zhuotao and Jiang, Li and Jia, Jiaya}, title = {Unified Language-driven Zero-shot Domain Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23407-23415} }
Category-Level Multi-Part Multi-Joint 3D Shape Assembly-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Yichen and Mo, Kaichun and Duan, Yueqi and Wang, He and Zhang, Jiequan and Shao, Lin}, title = {Category-Level Multi-Part Multi-Joint 3D Shape Assembly}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3281-3291} }
Equivariant Multi-Modality Image Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Zixiang and Bai, Haowen and Zhang, Jiangshe and Zhang, Yulun and Zhang, Kai and Xu, Shuang and Chen, Dongdong and Timofte, Radu and Van Gool, Luc}, title = {Equivariant Multi-Modality Image Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25912-25921} }
NeLF-Pro: Neural Light Field Probes for Multi-Scale Novel View Synthesis-
[pdf]
[supp]
[bibtex]@InProceedings{You_2024_CVPR, author = {You, Zinuo and Geiger, Andreas and Chen, Anpei}, title = {NeLF-Pro: Neural Light Field Probes for Multi-Scale Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19833-19843} }
One-Shot Open Affordance Learning with Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Gen and Sun, Deqing and Sevilla-Lara, Laura and Jampani, Varun}, title = {One-Shot Open Affordance Learning with Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3086-3096} }
Don't Look into the Dark: Latent Codes for Pluralistic Image Inpainting-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Haiwei and Zhao, Yajie}, title = {Don't Look into the Dark: Latent Codes for Pluralistic Image Inpainting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7591-7600} }
Incremental Nuclei Segmentation from Histopathological Images via Future-class Awareness and Compatibility-inspired Distillation-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Huyong and Wu, Huisi and Qin, Jing}, title = {Incremental Nuclei Segmentation from Histopathological Images via Future-class Awareness and Compatibility-inspired Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11408-11417} }
DiffEditor: Boosting Accuracy and Flexibility on Diffusion-based Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mou_2024_CVPR, author = {Mou, Chong and Wang, Xintao and Song, Jiechong and Shan, Ying and Zhang, Jian}, title = {DiffEditor: Boosting Accuracy and Flexibility on Diffusion-based Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8488-8497} }
Solving Masked Jigsaw Puzzles with Diffusion Vision Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Jinyang and Teshome, Wondmgezahu and Ghimire, Sandesh and Sznaier, Mario and Camps, Octavia}, title = {Solving Masked Jigsaw Puzzles with Diffusion Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23009-23018} }
InstructVideo: Instructing Video Diffusion Models with Human Feedback-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2024_CVPR, author = {Yuan, Hangjie and Zhang, Shiwei and Wang, Xiang and Wei, Yujie and Feng, Tao and Pan, Yining and Zhang, Yingya and Liu, Ziwei and Albanie, Samuel and Ni, Dong}, title = {InstructVideo: Instructing Video Diffusion Models with Human Feedback}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6463-6474} }
Fully Exploiting Every Real Sample: SuperPixel Sample Gradient Model Stealing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Yunlong and Deng, Xiaoheng and Liu, Yijing and Pei, Xinjun and Xia, Jiazhi and Chen, Wei}, title = {Fully Exploiting Every Real Sample: SuperPixel Sample Gradient Model Stealing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24316-24325} }
Progressive Divide-and-Conquer via Subsampling Decomposition for Accelerated MRI-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Chong and Guo, Lanqing and Wang, Yufei and Cheng, Hao and Yu, Yi and Wen, Bihan}, title = {Progressive Divide-and-Conquer via Subsampling Decomposition for Accelerated MRI}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25128-25137} }
DiffMOT: A Real-time Diffusion-based Multiple Object Tracker with Non-linear Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lv_2024_CVPR, author = {Lv, Weiyi and Huang, Yuhang and Zhang, Ning and Lin, Ruei-Sung and Han, Mei and Zeng, Dan}, title = {DiffMOT: A Real-time Diffusion-based Multiple Object Tracker with Non-linear Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19321-19330} }
MV-Adapter: Multimodal Video Transfer Learning for Video Text Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Jin_2024_CVPR, author = {Jin, Xiaojie and Zhang, Bowen and Gong, Weibo and Xu, Kai and Deng, Xueqing and Wang, Peng and Zhang, Zhao and Shen, Xiaohui and Feng, Jiashi}, title = {MV-Adapter: Multimodal Video Transfer Learning for Video Text Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27144-27153} }
Rethinking Multi-view Representation Learning via Distilled Disentangling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ke_2024_CVPR, author = {Ke, Guanzhou and Wang, Bo and Wang, Xiaoli and He, Shengfeng}, title = {Rethinking Multi-view Representation Learning via Distilled Disentangling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26774-26783} }
Just Add ?! Pose Induced Video Transformers for Understanding Activities of Daily Living-
[pdf]
[supp]
[bibtex]@InProceedings{Reilly_2024_CVPR, author = {Reilly, Dominick and Das, Srijan}, title = {Just Add ?! Pose Induced Video Transformers for Understanding Activities of Daily Living}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18340-18350} }
ViLa-MIL: Dual-scale Vision-Language Multiple Instance Learning for Whole Slide Image Classification-
[pdf]
[supp]
[bibtex]@InProceedings{Shi_2024_CVPR, author = {Shi, Jiangbo and Li, Chen and Gong, Tieliang and Zheng, Yefeng and Fu, Huazhu}, title = {ViLa-MIL: Dual-scale Vision-Language Multiple Instance Learning for Whole Slide Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11248-11258} }
Targeted Representation Alignment for Open-World Semi-Supervised Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Xiao_2024_CVPR, author = {Xiao, Ruixuan and Feng, Lei and Tang, Kai and Zhao, Junbo and Li, Yixuan and Chen, Gang and Wang, Haobo}, title = {Targeted Representation Alignment for Open-World Semi-Supervised Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23072-23082} }
Efficient Solution of Point-Line Absolute Pose-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hruby_2024_CVPR, author = {Hruby, Petr and Duff, Timothy and Pollefeys, Marc}, title = {Efficient Solution of Point-Line Absolute Pose}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21316-21325} }
Text-to-3D using Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Zilong and Wang, Feng and Wang, Yikai and Liu, Huaping}, title = {Text-to-3D using Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21401-21412} }
CapsFusion: Rethinking Image-Text Data at Scale-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Qiying and Sun, Quan and Zhang, Xiaosong and Cui, Yufeng and Zhang, Fan and Cao, Yue and Wang, Xinlong and Liu, Jingjing}, title = {CapsFusion: Rethinking Image-Text Data at Scale}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14022-14032} }
On the Content Bias in Frechet Video Distance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ge_2024_CVPR, author = {Ge, Songwei and Mahapatra, Aniruddha and Parmar, Gaurav and Zhu, Jun-Yan and Huang, Jia-Bin}, title = {On the Content Bias in Frechet Video Distance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7277-7288} }
Tumor Micro-environment Interactions Guided Graph Learning for Survival Analysis of Human Cancers from Whole-slide Pathological Images-
[pdf]
[bibtex]@InProceedings{Shao_2024_CVPR, author = {Shao, Wei and Shi, YangYang and Zhang, Daoqiang and Zhou, JunJie and Wan, Peng}, title = {Tumor Micro-environment Interactions Guided Graph Learning for Survival Analysis of Human Cancers from Whole-slide Pathological Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11694-11703} }
Towards Generalizable Multi-Object Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qin_2024_CVPR, author = {Qin, Zheng and Wang, Le and Zhou, Sanping and Fu, Panpan and Hua, Gang and Tang, Wei}, title = {Towards Generalizable Multi-Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18995-19004} }
POPDG: Popular 3D Dance Generation with PopDanceSet-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2024_CVPR, author = {Luo, Zhenye and Ren, Min and Hu, Xuecai and Huang, Yongzhen and Yao, Li}, title = {POPDG: Popular 3D Dance Generation with PopDanceSet}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26984-26993} }
Image Neural Field Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Yinbo and Wang, Oliver and Zhang, Richard and Shechtman, Eli and Wang, Xiaolong and Gharbi, Michael}, title = {Image Neural Field Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8007-8017} }
Discriminative Probing and Tuning for Text-to-Image Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Qu_2024_CVPR, author = {Qu, Leigang and Wang, Wenjie and Li, Yongqi and Zhang, Hanwang and Nie, Liqiang and Chua, Tat-Seng}, title = {Discriminative Probing and Tuning for Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7434-7444} }
Slice3D: Multi-Slice Occlusion-Revealing Single View 3D Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Yizhi and Lira, Wallace and Wang, Wenqi and Mahdavi-Amiri, Ali and Zhang, Hao}, title = {Slice3D: Multi-Slice Occlusion-Revealing Single View 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9881-9891} }
Towards More Accurate Diffusion Model Acceleration with A Timestep Tuner-
[pdf]
[supp]
[bibtex]@InProceedings{Xia_2024_CVPR, author = {Xia, Mengfei and Shen, Yujun and Lei, Changsong and Zhou, Yu and Zhao, Deli and Yi, Ran and Wang, Wenping and Liu, Yong-Jin}, title = {Towards More Accurate Diffusion Model Acceleration with A Timestep Tuner}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5736-5745} }
Rethinking Generalizable Face Anti-spoofing via Hierarchical Prototype-guided Distribution Refinement in Hyperbolic Space-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2024_CVPR, author = {Hu, Chengyang and Zhang, Ke-Yue and Yao, Taiping and Ding, Shouhong and Ma, Lizhuang}, title = {Rethinking Generalizable Face Anti-spoofing via Hierarchical Prototype-guided Distribution Refinement in Hyperbolic Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1032-1041} }
IIRP-Net: Iterative Inference Residual Pyramid Network for Enhanced Image Registration-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2024_CVPR, author = {Ma, Tai and Zhang, Suwei and Li, Jiafeng and Wen, Ying}, title = {IIRP-Net: Iterative Inference Residual Pyramid Network for Enhanced Image Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11546-11555} }
Learning without Exact Guidance: Updating Large-scale High-resolution Land Cover Maps from Low-resolution Historical Labels-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Zhuohong and He, Wei and Li, Jiepan and Lu, Fangxiao and Zhang, Hongyan}, title = {Learning without Exact Guidance: Updating Large-scale High-resolution Land Cover Maps from Low-resolution Historical Labels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27717-27727} }
GenesisTex: Adapting Image Denoising Diffusion to Texture Space-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2024_CVPR, author = {Gao, Chenjian and Jiang, Boyan and Li, Xinghui and Zhang, Yingpeng and Yu, Qian}, title = {GenesisTex: Adapting Image Denoising Diffusion to Texture Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4620-4629} }
TTA-EVF: Test-Time Adaptation for Event-based Video Frame Interpolation via Reliable Pixel and Sample Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Cho_2024_CVPR, author = {Cho, Hoonhee and Kim, Taewoo and Jeong, Yuhwan and Yoon, Kuk-Jin}, title = {TTA-EVF: Test-Time Adaptation for Event-based Video Frame Interpolation via Reliable Pixel and Sample Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25701-25711} }
Image-to-Image Matching via Foundation Models: A New Perspective for Open-Vocabulary Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Yuan and Sun, Rui and Luo, Naisong and Pan, Yuwen and Zhang, Tianzhu}, title = {Image-to-Image Matching via Foundation Models: A New Perspective for Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3952-3963} }
BigGait: Learning Gait Representation You Want by Large Vision Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2024_CVPR, author = {Ye, Dingqiang and Fan, Chao and Ma, Jingzhe and Liu, Xiaoming and Yu, Shiqi}, title = {BigGait: Learning Gait Representation You Want by Large Vision Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {200-210} }
BEVNeXt: Reviving Dense BEV Frameworks for 3D Object Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Zhenxin and Lan, Shiyi and Alvarez, Jose M. and Wu, Zuxuan}, title = {BEVNeXt: Reviving Dense BEV Frameworks for 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20113-20123} }
SNIFFER: Multimodal Large Language Model for Explainable Out-of-Context Misinformation Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qi_2024_CVPR, author = {Qi, Peng and Yan, Zehong and Hsu, Wynne and Lee, Mong Li}, title = {SNIFFER: Multimodal Large Language Model for Explainable Out-of-Context Misinformation Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13052-13062} }
Beyond Seen Primitive Concepts and Attribute-Object Compositional Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Saini_2024_CVPR, author = {Saini, Nirat and Pham, Khoi and Shrivastava, Abhinav}, title = {Beyond Seen Primitive Concepts and Attribute-Object Compositional Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14466-14476} }
Unleashing Network Potentials for Semantic Scene Completion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Fengyun and Sun, Qianru and Zhang, Dong and Tang, Jinhui}, title = {Unleashing Network Potentials for Semantic Scene Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10314-10323} }
HOIST-Former: Hand-held Objects Identification Segmentation and Tracking in the Wild-
[pdf]
[bibtex]@InProceedings{Narasimhaswamy_2024_CVPR, author = {Narasimhaswamy, Supreeth and Nguyen, Huy Anh and Huang, Lihan and Hoai, Minh}, title = {HOIST-Former: Hand-held Objects Identification Segmentation and Tracking in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2351-2361} }
Contextrast: Contextual Contrastive Learning for Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sung_2024_CVPR, author = {Sung, Changki and Kim, Wanhee and An, Jungho and Lee, Wooju and Lim, Hyungtae and Myung, Hyun}, title = {Contextrast: Contextual Contrastive Learning for Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3732-3742} }
Learning Occupancy for Monocular 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2024_CVPR, author = {Peng, Liang and Xu, Junkai and Cheng, Haoran and Yang, Zheng and Wu, Xiaopei and Qian, Wei and Wang, Wenxiao and Wu, Boxi and Cai, Deng}, title = {Learning Occupancy for Monocular 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10281-10292} }
LAA-Net: Localized Artifact Attention Network for Quality-Agnostic and Generalizable Deepfake Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Nguyen_2024_CVPR, author = {Nguyen, Dat and Mejri, Nesryne and Singh, Inder Pal and Kuleshova, Polina and Astrid, Marcella and Kacem, Anis and Ghorbel, Enjie and Aouada, Djamila}, title = {LAA-Net: Localized Artifact Attention Network for Quality-Agnostic and Generalizable Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17395-17405} }
LEAD: Learning Decomposition for Source-free Universal Domain Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qu_2024_CVPR, author = {Qu, Sanqing and Zou, Tianpei and He, Lianghua and R\"ohrbein, Florian and Knoll, Alois and Chen, Guang and Jiang, Changjun}, title = {LEAD: Learning Decomposition for Source-free Universal Domain Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23334-23343} }
AUEditNet: Dual-Branch Facial Action Unit Intensity Manipulation with Implicit Disentanglement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jin_2024_CVPR, author = {Jin, Shiwei and Wang, Zhen and Wang, Lei and Liu, Peng and Bi, Ning and Nguyen, Truong}, title = {AUEditNet: Dual-Branch Facial Action Unit Intensity Manipulation with Implicit Disentanglement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2104-2113} }
BodyMAP - Jointly Predicting Body Mesh and 3D Applied Pressure Map for People in Bed-
[pdf]
[supp]
[bibtex]@InProceedings{Tandon_2024_CVPR, author = {Tandon, Abhishek and Goyal, Anujraaj and Clever, Henry M. and Erickson, Zackory}, title = {BodyMAP - Jointly Predicting Body Mesh and 3D Applied Pressure Map for People in Bed}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2480-2489} }
OneLLM: One Framework to Align All Modalities with Language-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2024_CVPR, author = {Han, Jiaming and Gong, Kaixiong and Zhang, Yiyuan and Wang, Jiaqi and Zhang, Kaipeng and Lin, Dahua and Qiao, Yu and Gao, Peng and Yue, Xiangyu}, title = {OneLLM: One Framework to Align All Modalities with Language}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26584-26595} }
PAD: Patch-Agnostic Defense against Adversarial Patch Attacks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jing_2024_CVPR, author = {Jing, Lihua and Wang, Rui and Ren, Wenqi and Dong, Xin and Zou, Cong}, title = {PAD: Patch-Agnostic Defense against Adversarial Patch Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24472-24481} }
MULAN: A Multi Layer Annotated Dataset for Controllable Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tudosiu_2024_CVPR, author = {Tudosiu, Petru-Daniel and Yang, Yongxin and Zhang, Shifeng and Chen, Fei and McDonagh, Steven and Lampouras, Gerasimos and Iacobacci, Ignacio and Parisot, Sarah}, title = {MULAN: A Multi Layer Annotated Dataset for Controllable Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22413-22422} }
Rotation-Agnostic Image Representation Learning for Digital Pathology-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Alfasly_2024_CVPR, author = {Alfasly, Saghir and Shafique, Abubakr and Nejat, Peyman and Khan, Jibran and Alsaafin, Areej and Alabtah, Ghazal and Tizhoosh, H.R.}, title = {Rotation-Agnostic Image Representation Learning for Digital Pathology}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11683-11693} }
Unbiased Faster R-CNN for Single-source Domain Generalized Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Yajing and Zhou, Shijun and Liu, Xiyao and Hao, Chunhui and Fan, Baojie and Tian, Jiandong}, title = {Unbiased Faster R-CNN for Single-source Domain Generalized Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28838-28847} }
Super-Resolution Reconstruction from Bayer-Pattern Spike Streams-
[pdf]
[supp]
[bibtex]@InProceedings{Dong_2024_CVPR, author = {Dong, Yanchen and Xiong, Ruiqin and Zhang, Jian and Yu, Zhaofei and Fan, Xiaopeng and Zhu, Shuyuan and Huang, Tiejun}, title = {Super-Resolution Reconstruction from Bayer-Pattern Spike Streams}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24871-24880} }
EASE-DETR: Easing the Competition among Object Queries-
[pdf]
[bibtex]@InProceedings{Gao_2024_CVPR, author = {Gao, Yulu and Sun, Yifan and Ding, Xudong and Zhao, Chuyang and Liu, Si}, title = {EASE-DETR: Easing the Competition among Object Queries}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17282-17291} }
KPConvX: Modernizing Kernel Point Convolution with Kernel Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Thomas_2024_CVPR, author = {Thomas, Hugues and Tsai, Yao-Hung Hubert and Barfoot, Timothy D. and Zhang, Jian}, title = {KPConvX: Modernizing Kernel Point Convolution with Kernel Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5525-5535} }
Clockwork Diffusion: Efficient Generation With Model-Step Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Habibian_2024_CVPR, author = {Habibian, Amirhossein and Ghodrati, Amir and Fathima, Noor and Sautiere, Guillaume and Garrepalli, Risheek and Porikli, Fatih and Petersen, Jens}, title = {Clockwork Diffusion: Efficient Generation With Model-Step Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8352-8361} }
Pick-or-Mix: Dynamic Channel Sampling for ConvNets-
[pdf]
[supp]
[bibtex]@InProceedings{Kumar_2024_CVPR, author = {Kumar, Ashish and Kim, Daneul and Park, Jaesik and Behera, Laxmidhar}, title = {Pick-or-Mix: Dynamic Channel Sampling for ConvNets}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5873-5882} }
Self-Discovering Interpretable Diffusion Latent Directions for Responsible Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Hang and Shen, Chengzhi and Torr, Philip and Tresp, Volker and Gu, Jindong}, title = {Self-Discovering Interpretable Diffusion Latent Directions for Responsible Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12006-12016} }
HiLo: Detailed and Robust 3D Clothed Human Reconstruction with High-and Low-Frequency Information of Parametric Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Yifan and Liu, Dong and Zhang, Shuhai and Deng, Zeshuai and Huang, Zixiong and Tan, Mingkui}, title = {HiLo: Detailed and Robust 3D Clothed Human Reconstruction with High-and Low-Frequency Information of Parametric Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10671-10681} }
Promptable Behaviors: Personalizing Multi-Objective Rewards from Human Preferences-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hwang_2024_CVPR, author = {Hwang, Minyoung and Weihs, Luca and Park, Chanwoo and Lee, Kimin and Kembhavi, Aniruddha and Ehsani, Kiana}, title = {Promptable Behaviors: Personalizing Multi-Objective Rewards from Human Preferences}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16216-16226} }
Stationary Representations: Optimally Approximating Compatibility and Implications for Improved Model Replacements-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Biondi_2024_CVPR, author = {Biondi, Niccol\`o and Pernici, Federico and Ricci, Simone and Del Bimbo, Alberto}, title = {Stationary Representations: Optimally Approximating Compatibility and Implications for Improved Model Replacements}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28793-28804} }
Towards Calibrated Multi-label Deep Neural Networks-
[pdf]
[supp]
[bibtex]@InProceedings{Cheng_2024_CVPR, author = {Cheng, Jiacheng and Vasconcelos, Nuno}, title = {Towards Calibrated Multi-label Deep Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27589-27599} }
SceneTex: High-Quality Texture Synthesis for Indoor Scenes via Diffusion Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Dave Zhenyu and Li, Haoxuan and Lee, Hsin-Ying and Tulyakov, Sergey and Nie{\ss}ner, Matthias}, title = {SceneTex: High-Quality Texture Synthesis for Indoor Scenes via Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21081-21091} }
Neural Underwater Scene Representation-
[pdf]
[supp]
[bibtex]@InProceedings{Tang_2024_CVPR, author = {Tang, Yunkai and Zhu, Chengxuan and Wan, Renjie and Xu, Chao and Shi, Boxin}, title = {Neural Underwater Scene Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11780-11789} }
Progress-Aware Online Action Segmentation for Egocentric Procedural Task Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Shen_2024_CVPR, author = {Shen, Yuhan and Elhamifar, Ehsan}, title = {Progress-Aware Online Action Segmentation for Egocentric Procedural Task Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18186-18197} }
TUMTraf V2X Cooperative Perception Dataset-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zimmer_2024_CVPR, author = {Zimmer, Walter and Wardana, Gerhard Arya and Sritharan, Suren and Zhou, Xingcheng and Song, Rui and Knoll, Alois C.}, title = {TUMTraf V2X Cooperative Perception Dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22668-22677} }
Constrained Layout Generation with Factor Graphs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dupty_2024_CVPR, author = {Dupty, Mohammed Haroon and Dong, Yanfei and Leng, Sicong and Fu, Guoji and Goh, Yong Liang and Lu, Wei and Lee, Wee Sun}, title = {Constrained Layout Generation with Factor Graphs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12851-12860} }
SLICE: Stabilized LIME for Consistent Explanations for Image Classification-
[pdf]
[supp]
[bibtex]@InProceedings{Bora_2024_CVPR, author = {Bora, Revoti Prasad and Terh\"orst, Philipp and Veldhuis, Raymond and Ramachandra, Raghavendra and Raja, Kiran}, title = {SLICE: Stabilized LIME for Consistent Explanations for Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10988-10996} }
Anomaly Heterogeneity Learning for Open-set Supervised Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Jiawen and Ding, Choubo and Tian, Yu and Pang, Guansong}, title = {Anomaly Heterogeneity Learning for Open-set Supervised Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17616-17626} }
SPECAT: SPatial-spEctral Cumulative-Attention Transformer for High-Resolution Hyperspectral Image Reconstruction-
[pdf]
[bibtex]@InProceedings{Yao_2024_CVPR, author = {Yao, Zhiyang and Liu, Shuyang and Yuan, Xiaoyun and Fang, Lu}, title = {SPECAT: SPatial-spEctral Cumulative-Attention Transformer for High-Resolution Hyperspectral Image Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25368-25377} }
Attentive Illumination Decomposition Model for Multi-Illuminant White Balancing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Dongyoung and Kim, Jinwoo and Yu, Junsang and Kim, Seon Joo}, title = {Attentive Illumination Decomposition Model for Multi-Illuminant White Balancing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25512-25521} }
Efficient Stitchable Task Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2024_CVPR, author = {He, Haoyu and Pan, Zizheng and Liu, Jing and Cai, Jianfei and Zhuang, Bohan}, title = {Efficient Stitchable Task Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28555-28565} }
Image Processing GNN: Breaking Rigidity in Super-Resolution-
[pdf]
[supp]
[bibtex]@InProceedings{Tian_2024_CVPR, author = {Tian, Yuchuan and Chen, Hanting and Xu, Chao and Wang, Yunhe}, title = {Image Processing GNN: Breaking Rigidity in Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24108-24117} }
Revisiting Counterfactual Problems in Referring Expression Comprehension-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Zhihan and Li, Ruifan}, title = {Revisiting Counterfactual Problems in Referring Expression Comprehension}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13438-13448} }
DyBluRF: Dynamic Neural Radiance Fields from Blurry Monocular Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2024_CVPR, author = {Sun, Huiqiang and Li, Xingyi and Shen, Liao and Ye, Xinyi and Xian, Ke and Cao, Zhiguo}, title = {DyBluRF: Dynamic Neural Radiance Fields from Blurry Monocular Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7517-7527} }
Compressed 3D Gaussian Splatting for Accelerated Novel View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Niedermayr_2024_CVPR, author = {Niedermayr, Simon and Stumpfegger, Josef and Westermann, R\"udiger}, title = {Compressed 3D Gaussian Splatting for Accelerated Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10349-10358} }
Separating the "Chirp" from the "Chat": Self-supervised Visual Grounding of Sound and Language-
[pdf]
[arXiv]
[bibtex]@InProceedings{Hamilton_2024_CVPR, author = {Hamilton, Mark and Zisserman, Andrew and Hershey, John R. and Freeman, William T.}, title = {Separating the ''Chirp'' from the ''Chat'': Self-supervised Visual Grounding of Sound and Language}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13117-13127} }
Towards Generalizing to Unseen Domains with Few Labels-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Galappaththige_2024_CVPR, author = {Galappaththige, Chamuditha Jayanga and Baliah, Sanoojan and Gunawardhana, Malitha and Khan, Muhammad Haris}, title = {Towards Generalizing to Unseen Domains with Few Labels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23691-23700} }
MA-LMM: Memory-Augmented Large Multimodal Model for Long-Term Video Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{He_2024_CVPR, author = {He, Bo and Li, Hengduo and Jang, Young Kyun and Jia, Menglin and Cao, Xuefei and Shah, Ashish and Shrivastava, Abhinav and Lim, Ser-Nam}, title = {MA-LMM: Memory-Augmented Large Multimodal Model for Long-Term Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13504-13514} }
AAMDM: Accelerated Auto-regressive Motion Diffusion Model-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Tianyu and Qiao, Calvin and Ren, Guanqiao and Yin, KangKang and Ha, Sehoon}, title = {AAMDM: Accelerated Auto-regressive Motion Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1813-1823} }
Towards Understanding Cross and Self-Attention in Stable Diffusion for Text-Guided Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Bingyan and Wang, Chengyu and Cao, Tingfeng and Jia, Kui and Huang, Jun}, title = {Towards Understanding Cross and Self-Attention in Stable Diffusion for Text-Guided Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7817-7826} }
Dr2Net: Dynamic Reversible Dual-Residual Networks for Memory-Efficient Finetuning-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Chen and Liu, Shuming and Mangalam, Karttikeya and Qian, Guocheng and Zohra, Fatimah and Alghannam, Abdulmohsen and Malik, Jitendra and Ghanem, Bernard}, title = {Dr2Net: Dynamic Reversible Dual-Residual Networks for Memory-Efficient Finetuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15835-15844} }
PNeRV: Enhancing Spatial Consistency via Pyramidal Neural Representation for Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Qi and Asif, M. Salman and Ma, Zhan}, title = {PNeRV: Enhancing Spatial Consistency via Pyramidal Neural Representation for Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19103-19112} }
LTGC: Long-tail Recognition via Leveraging LLMs-driven Generated Content-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Qihao and Dai, Yalun and Li, Hao and Hu, Wei and Zhang, Fan and Liu, Jun}, title = {LTGC: Long-tail Recognition via Leveraging LLMs-driven Generated Content}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19510-19520} }
DiverGen: Improving Instance Segmentation by Learning Wider Data Distribution with More Diverse Generative Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2024_CVPR, author = {Fan, Chengxiang and Zhu, Muzhi and Chen, Hao and Liu, Yang and Wu, Weijia and Zhang, Huaqi and Shen, Chunhua}, title = {DiverGen: Improving Instance Segmentation by Learning Wider Data Distribution with More Diverse Generative Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3986-3995} }
Neural Refinement for Absolute Pose Regression with Feature Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Shuai and Bhalgat, Yash and Li, Xinghui and Bian, Jia-Wang and Li, Kejie and Wang, Zirui and Prisacariu, Victor Adrian}, title = {Neural Refinement for Absolute Pose Regression with Feature Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20987-20996} }
Learning Disentangled Identifiers for Action-Customized Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Siteng and Gong, Biao and Feng, Yutong and Chen, Xi and Fu, Yuqian and Liu, Yu and Wang, Donglin}, title = {Learning Disentangled Identifiers for Action-Customized Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7797-7806} }
Automatic Controllable Colorization via Imagination-
[pdf]
[arXiv]
[bibtex]@InProceedings{Cong_2024_CVPR, author = {Cong, Xiaoyan and Wu, Yue and Chen, Qifeng and Lei, Chenyang}, title = {Automatic Controllable Colorization via Imagination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2609-2619} }
Point Transformer V3: Simpler Faster Stronger-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Xiaoyang and Jiang, Li and Wang, Peng-Shuai and Liu, Zhijian and Liu, Xihui and Qiao, Yu and Ouyang, Wanli and He, Tong and Zhao, Hengshuang}, title = {Point Transformer V3: Simpler Faster Stronger}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4840-4851} }
DiffCast: A Unified Framework via Residual Diffusion for Precipitation Nowcasting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Demin and Li, Xutao and Ye, Yunming and Zhang, Baoquan and Luo, Chuyao and Dai, Kuai and Wang, Rui and Chen, Xunlai}, title = {DiffCast: A Unified Framework via Residual Diffusion for Precipitation Nowcasting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27758-27767} }
Ego-Exo4D: Understanding Skilled Human Activity from First- and Third-Person Perspectives-
[pdf]
[supp]
[bibtex]@InProceedings{Grauman_2024_CVPR, author = {Grauman, Kristen and Westbury, Andrew and Torresani, Lorenzo and Kitani, Kris and Malik, Jitendra and Afouras, Triantafyllos and Ashutosh, Kumar and Baiyya, Vijay and Bansal, Siddhant and Boote, Bikram and Byrne, Eugene and Chavis, Zach and Chen, Joya and Cheng, Feng and Chu, Fu-Jen and Crane, Sean and Dasgupta, Avijit and Dong, Jing and Escobar, Maria and Forigua, Cristhian and Gebreselasie, Abrham and Haresh, Sanjay and Huang, Jing and Islam, Md Mohaiminul and Jain, Suyog and Khirodkar, Rawal and Kukreja, Devansh and Liang, Kevin J and Liu, Jia-Wei and Majumder, Sagnik and Mao, Yongsen and Martin, Miguel and Mavroudi, Effrosyni and Nagarajan, Tushar and Ragusa, Francesco and Ramakrishnan, Santhosh Kumar and Seminara, Luigi and Somayazulu, Arjun and Song, Yale and Su, Shan and Xue, Zihui and Zhang, Edward and Zhang, Jinxu and Castillo, Angela and Chen, Changan and Fu, Xinzhu and Furuta, Ryosuke and Gonzalez, Cristina and Gupta, Prince and Hu, Jiabo and Huang, Yifei and Huang, Yiming and Khoo, Weslie and Kumar, Anush and Kuo, Robert and Lakhavani, Sach and Liu, Miao and Luo, Mi and Luo, Zhengyi and Meredith, Brighid and Miller, Austin and Oguntola, Oluwatumininu and Pan, Xiaqing and Peng, Penny and Pramanick, Shraman and Ramazanova, Merey and Ryan, Fiona and Shan, Wei and Somasundaram, Kiran and Song, Chenan and Southerland, Audrey and Tateno, Masatoshi and Wang, Huiyu and Wang, Yuchen and Yagi, Takuma and Yan, Mingfei and Yang, Xitong and Yu, Zecheng and Zha, Shengxin Cindy and Zhao, Chen and Zhao, Ziwei and Zhu, Zhifan and Zhuo, Jeff and Arbelaez, Pablo and Bertasius, Gedas and Damen, Dima and Engel, Jakob and Farinella, Giovanni Maria and Furnari, Antonino and Ghanem, Bernard and Hoffman, Judy and Jawahar, C.V. and Newcombe, Richard and Park, Hyun Soo and Rehg, James M. and Sato, Yoichi and Savva, Manolis and Shi, Jianbo and Shou, Mike Zheng and Wray, Michael}, title = {Ego-Exo4D: Understanding Skilled Human Activity from First- and Third-Person Perspectives}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19383-19400} }
Point Cloud Pre-training with Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2024_CVPR, author = {Zheng, Xiao and Huang, Xiaoshui and Mei, Guofeng and Hou, Yuenan and Lyu, Zhaoyang and Dai, Bo and Ouyang, Wanli and Gong, Yongshun}, title = {Point Cloud Pre-training with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22935-22945} }
Mask4Align: Aligned Entity Prompting with Color Masks for Multi-Entity Localization Problems-
[pdf]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Haoquan and Huang, Ronggang and Xie, Yi and Zhang, Huaidong}, title = {Mask4Align: Aligned Entity Prompting with Color Masks for Multi-Entity Localization Problems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13373-13383} }
RCL: Reliable Continual Learning for Unified Failure Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Fei and Cheng, Zhen and Zhang, Xu-Yao and Liu, Cheng-Lin and Zhang, Zhaoxiang}, title = {RCL: Reliable Continual Learning for Unified Failure Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12140-12150} }
Referring Image Editing: Object-level Image Editing via Referring Expressions-
[pdf]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Chang and Li, Xiangtai and Ding, Henghui}, title = {Referring Image Editing: Object-level Image Editing via Referring Expressions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13128-13138} }
CAMixerSR: Only Details Need More "Attention"-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Yan and Liu, Yi and Zhao, Shijie and Li, Junlin and Zhang, Li}, title = {CAMixerSR: Only Details Need More ''Attention''}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25837-25846} }
Towards Backward-Compatible Continual Learning of Image Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Duan_2024_CVPR, author = {Duan, Zhihao and Lu, Ming and Yang, Justin and He, Jiangpeng and Ma, Zhan and Zhu, Fengqing}, title = {Towards Backward-Compatible Continual Learning of Image Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25564-25573} }
Latent Modulated Function for Computational Optimal Continuous Image Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2024_CVPR, author = {He, Zongyao and Jin, Zhi}, title = {Latent Modulated Function for Computational Optimal Continuous Image Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26026-26035} }
Unsupervised Video Domain Adaptation with Masked Pre-Training and Collaborative Self-Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Reddy_2024_CVPR, author = {Reddy, Arun and Paul, William and Rivera, Corban and Shah, Ketul and de Melo, Celso M. and Chellappa, Rama}, title = {Unsupervised Video Domain Adaptation with Masked Pre-Training and Collaborative Self-Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18919-18929} }
UniDepth: Universal Monocular Metric Depth Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Piccinelli_2024_CVPR, author = {Piccinelli, Luigi and Yang, Yung-Hsu and Sakaridis, Christos and Segu, Mattia and Li, Siyuan and Van Gool, Luc and Yu, Fisher}, title = {UniDepth: Universal Monocular Metric Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10106-10116} }
EMOPortraits: Emotion-enhanced Multimodal One-shot Head Avatars-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Drobyshev_2024_CVPR, author = {Drobyshev, Nikita and Casademunt, Antoni Bigata and Vougioukas, Konstantinos and Landgraf, Zoe and Petridis, Stavros and Pantic, Maja}, title = {EMOPortraits: Emotion-enhanced Multimodal One-shot Head Avatars}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8498-8507} }
NeuRAD: Neural Rendering for Autonomous Driving-
[pdf]
[supp]
[bibtex]@InProceedings{Tonderski_2024_CVPR, author = {Tonderski, Adam and Lindstr\"om, Carl and Hess, Georg and Ljungbergh, William and Svensson, Lennart and Petersson, Christoffer}, title = {NeuRAD: Neural Rendering for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14895-14904} }
VideoCutLER: Surprisingly Simple Unsupervised Video Instance Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Xudong and Misra, Ishan and Zeng, Ziyun and Girdhar, Rohit and Darrell, Trevor}, title = {VideoCutLER: Surprisingly Simple Unsupervised Video Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22755-22764} }
Bootstrapping Chest CT Image Understanding by Distilling Knowledge from X-ray Expert Models-
[pdf]
[supp]
[bibtex]@InProceedings{Cao_2024_CVPR, author = {Cao, Weiwei and Zhang, Jianpeng and Xia, Yingda and Mok, Tony C. W. and Li, Zi and Ye, Xianghua and Lu, Le and Zheng, Jian and Tang, Yuxing and Zhang, Ling}, title = {Bootstrapping Chest CT Image Understanding by Distilling Knowledge from X-ray Expert Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11238-11247} }
Magic Tokens: Select Diverse Tokens for Multi-modal Object Re-Identification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Pingping and Wang, Yuhao and Liu, Yang and Tu, Zhengzheng and Lu, Huchuan}, title = {Magic Tokens: Select Diverse Tokens for Multi-modal Object Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17117-17126} }
Open3DIS: Open-Vocabulary 3D Instance Segmentation with 2D Mask Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2024_CVPR, author = {Nguyen, Phuc and Ngo, Tuan Duc and Kalogerakis, Evangelos and Gan, Chuang and Tran, Anh and Pham, Cuong and Nguyen, Khoi}, title = {Open3DIS: Open-Vocabulary 3D Instance Segmentation with 2D Mask Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4018-4028} }
SignGraph: A Sign Sequence is Worth Graphs of Nodes-
[pdf]
[supp]
[bibtex]@InProceedings{Gan_2024_CVPR, author = {Gan, Shiwei and Yin, Yafeng and Jiang, Zhiwei and Wen, Hongkai and Xie, Lei and Lu, Sanglu}, title = {SignGraph: A Sign Sequence is Worth Graphs of Nodes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13470-13479} }
ControlRoom3D: Room Generation using Semantic Proxy Rooms-
[pdf]
[supp]
[bibtex]@InProceedings{Schult_2024_CVPR, author = {Schult, Jonas and Tsai, Sam and H\"ollein, Lukas and Wu, Bichen and Wang, Jialiang and Ma, Chih-Yao and Li, Kunpeng and Wang, Xiaofang and Wimbauer, Felix and He, Zijian and Zhang, Peizhao and Leibe, Bastian and Vajda, Peter and Hou, Ji}, title = {ControlRoom3D: Room Generation using Semantic Proxy Rooms}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6201-6210} }
DeconfuseTrack: Dealing with Confusion for Multi-Object Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Cheng and Han, Shoudong and He, Mengyu and Zheng, Wenbo and Wei, Yuhao}, title = {DeconfuseTrack: Dealing with Confusion for Multi-Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19290-19299} }
PAPR in Motion: Seamless Point-level 3D Scene Interpolation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2024_CVPR, author = {Peng, Shichong and Zhang, Yanshu and Li, Ke}, title = {PAPR in Motion: Seamless Point-level 3D Scene Interpolation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21007-21016} }
Causal Mode Multiplexer: A Novel Framework for Unbiased Multispectral Pedestrian Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Taeheon and Shin, Sebin and Yu, Youngjoon and Kim, Hak Gu and Ro, Yong Man}, title = {Causal Mode Multiplexer: A Novel Framework for Unbiased Multispectral Pedestrian Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26784-26793} }
HIMap: HybrId Representation Learning for End-to-end Vectorized HD Map Construction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Yi and Zhang, Hui and Yu, Jiaqian and Yang, Yifan and Jung, Sangil and Park, Seung-In and Yoo, ByungIn}, title = {HIMap: HybrId Representation Learning for End-to-end Vectorized HD Map Construction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15396-15406} }
LTA-PCS: Learnable Task-Agnostic Point Cloud Sampling-
[pdf]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Jiaheng and Li, Jianhao and Wang, Kaisiyuan and Guo, Hongcheng and Yang, Jian and Peng, Junran and Xu, Ke and Liu, Xianglong and Guo, Jinyang}, title = {LTA-PCS: Learnable Task-Agnostic Point Cloud Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28035-28045} }
Non-Rigid Structure-from-Motion: Temporally-Smooth Procrustean Alignment and Spatially-Variant Deformation Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Shi_2024_CVPR, author = {Shi, Jiawei and Deng, Hui and Dai, Yuchao}, title = {Non-Rigid Structure-from-Motion: Temporally-Smooth Procrustean Alignment and Spatially-Variant Deformation Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21446-21455} }
ShapeMatcher: Self-Supervised Joint Shape Canonicalization Segmentation Retrieval and Deformation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Di_2024_CVPR, author = {Di, Yan and Zhang, Chenyangguang and Wang, Chaowei and Zhang, Ruida and Zhai, Guangyao and Li, Yanyan and Fu, Bowen and Ji, Xiangyang and Gao, Shan}, title = {ShapeMatcher: Self-Supervised Joint Shape Canonicalization Segmentation Retrieval and Deformation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21017-21028} }
UniPTS: A Unified Framework for Proficient Post-Training Sparsity-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xie_2024_CVPR, author = {Xie, Jingjing and Zhang, Yuxin and Lin, Mingbao and Cao, Liujuan and Ji, Rongrong}, title = {UniPTS: A Unified Framework for Proficient Post-Training Sparsity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5746-5755} }
HumanNorm: Learning Normal Diffusion Model for High-quality and Realistic 3D Human Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Xin and Shao, Ruizhi and Zhang, Qi and Zhang, Hongwen and Feng, Ying and Liu, Yebin and Wang, Qing}, title = {HumanNorm: Learning Normal Diffusion Model for High-quality and Realistic 3D Human Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4568-4577} }
Unleashing Unlabeled Data: A Paradigm for Cross-View Geo-Localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Guopeng and Qian, Ming and Xia, Gui-Song}, title = {Unleashing Unlabeled Data: A Paradigm for Cross-View Geo-Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16719-16729} }
Global Latent Neural Rendering-
[pdf]
[arXiv]
[bibtex]@InProceedings{Tanay_2024_CVPR, author = {Tanay, Thomas and Maggioni, Matteo}, title = {Global Latent Neural Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19723-19733} }
PanoOcc: Unified Occupancy Representation for Camera-based 3D Panoptic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Yuqi and Chen, Yuntao and Liao, Xingyu and Fan, Lue and Zhang, Zhaoxiang}, title = {PanoOcc: Unified Occupancy Representation for Camera-based 3D Panoptic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17158-17168} }
Sparse Views Near Light: A Practical Paradigm for Uncalibrated Point-light Photometric Stereo-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Brahimi_2024_CVPR, author = {Brahimi, Mohammed and Haefner, Bjoern and Ye, Zhenzhang and Goldluecke, Bastian and Cremers, Daniel}, title = {Sparse Views Near Light: A Practical Paradigm for Uncalibrated Point-light Photometric Stereo}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11862-11872} }
Meta-Point Learning and Refining for Category-Agnostic Pose Estimation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Junjie and Yan, Jiebin and Fang, Yuming and Niu, Li}, title = {Meta-Point Learning and Refining for Category-Agnostic Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23534-23543} }
Cross-view and Cross-pose Completion for 3D Human Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Armando_2024_CVPR, author = {Armando, Matthieu and Galaaoui, Salma and Baradel, Fabien and Lucas, Thomas and Leroy, Vincent and Br\'egier, Romain and Weinzaepfel, Philippe and Rogez, Gr\'egory}, title = {Cross-view and Cross-pose Completion for 3D Human Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1512-1523} }
Batch Normalization Alleviates the Spectral Bias in Coordinate Networks-
[pdf]
[supp]
[bibtex]@InProceedings{Cai_2024_CVPR, author = {Cai, Zhicheng and Zhu, Hao and Shen, Qiu and Wang, Xinran and Cao, Xun}, title = {Batch Normalization Alleviates the Spectral Bias in Coordinate Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25160-25171} }
Efficient Scene Recovery Using Luminous Flux Prior-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Zhongyu and Zhang, Lei}, title = {Efficient Scene Recovery Using Luminous Flux Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2743-2752} }
LQMFormer: Language-aware Query Mask Transformer for Referring Image Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Shah_2024_CVPR, author = {Shah, Nisarg A. and VS, Vibashan and Patel, Vishal M.}, title = {LQMFormer: Language-aware Query Mask Transformer for Referring Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12903-12913} }
Customize your NeRF: Adaptive Source Driven 3D Scene Editing via Local-Global Iterative Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2024_CVPR, author = {He, Runze and Huang, Shaofei and Nie, Xuecheng and Hui, Tianrui and Liu, Luoqi and Dai, Jiao and Han, Jizhong and Li, Guanbin and Liu, Si}, title = {Customize your NeRF: Adaptive Source Driven 3D Scene Editing via Local-Global Iterative Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6966-6975} }
SplaTAM: Splat Track & Map 3D Gaussians for Dense RGB-D SLAM-
[pdf]
[supp]
[bibtex]@InProceedings{Keetha_2024_CVPR, author = {Keetha, Nikhil and Karhade, Jay and Jatavallabhula, Krishna Murthy and Yang, Gengshan and Scherer, Sebastian and Ramanan, Deva and Luiten, Jonathon}, title = {SplaTAM: Splat Track \& Map 3D Gaussians for Dense RGB-D SLAM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21357-21366} }
Instance-based Max-margin for Practical Few-shot Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fu_2024_CVPR, author = {Fu, Minghao and Zhu, Ke}, title = {Instance-based Max-margin for Practical Few-shot Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28674-28683} }
Spherical Mask: Coarse-to-Fine 3D Point Cloud Instance Segmentation with Spherical Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shin_2024_CVPR, author = {Shin, Sangyun and Zhou, Kaichen and Vankadari, Madhu and Markham, Andrew and Trigoni, Niki}, title = {Spherical Mask: Coarse-to-Fine 3D Point Cloud Instance Segmentation with Spherical Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4060-4069} }
Omni-Q: Omni-Directional Scene Understanding for Unsupervised Visual Grounding-
[pdf]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Sai and Lin, Yutian and Wu, Yu}, title = {Omni-Q: Omni-Directional Scene Understanding for Unsupervised Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14261-14270} }
VISTA-LLAMA: Reducing Hallucination in Video Language Models via Equal Distance to Visual Tokens-
[pdf]
[bibtex]@InProceedings{Ma_2024_CVPR, author = {Ma, Fan and Jin, Xiaojie and Wang, Heng and Xian, Yuchen and Feng, Jiashi and Yang, Yi}, title = {VISTA-LLAMA: Reducing Hallucination in Video Language Models via Equal Distance to Visual Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13151-13160} }
FSRT: Facial Scene Representation Transformer for Face Reenactment from Factorized Appearance Head-pose and Facial Expression Features-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rochow_2024_CVPR, author = {Rochow, Andre and Schwarz, Max and Behnke, Sven}, title = {FSRT: Facial Scene Representation Transformer for Face Reenactment from Factorized Appearance Head-pose and Facial Expression Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7716-7726} }
Efficient Multitask Dense Predictor via Binarization-
[pdf]
[arXiv]
[bibtex]@InProceedings{Shang_2024_CVPR, author = {Shang, Yuzhang and Xu, Dan and Liu, Gaowen and Kompella, Ramana Rao and Yan, Yan}, title = {Efficient Multitask Dense Predictor via Binarization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15899-15908} }
TetraSphere: A Neural Descriptor for O(3)-Invariant Point Cloud Analysis-
[pdf]
[supp]
[bibtex]@InProceedings{Melnyk_2024_CVPR, author = {Melnyk, Pavlo and Robinson, Andreas and Felsberg, Michael and Wadenb\"ack, M\r{a}rten}, title = {TetraSphere: A Neural Descriptor for O(3)-Invariant Point Cloud Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5620-5630} }
ZeroRF: Fast Sparse View 360deg Reconstruction with Zero Pretraining-
[pdf]
[supp]
[bibtex]@InProceedings{Shi_2024_CVPR, author = {Shi, Ruoxi and Wei, Xinyue and Wang, Cheng and Su, Hao}, title = {ZeroRF: Fast Sparse View 360deg Reconstruction with Zero Pretraining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21114-21124} }
RCooper: A Real-world Large-scale Dataset for Roadside Cooperative Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hao_2024_CVPR, author = {Hao, Ruiyang and Fan, Siqi and Dai, Yingru and Zhang, Zhenlin and Li, Chenxi and Wang, Yuntian and Yu, Haibao and Yang, Wenxian and Yuan, Jirui and Nie, Zaiqing}, title = {RCooper: A Real-world Large-scale Dataset for Roadside Cooperative Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22347-22357} }
TutteNet: Injective 3D Deformations by Composition of 2D Mesh Deformations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2024_CVPR, author = {Sun, Bo and Groueix, Thibault and Song, Chen and Huang, Qixing and Aigerman, Noam}, title = {TutteNet: Injective 3D Deformations by Composition of 2D Mesh Deformations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21378-21389} }
WANDR: Intention-guided Human Motion Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Diomataris_2024_CVPR, author = {Diomataris, Markos and Athanasiou, Nikos and Taheri, Omid and Wang, Xi and Hilliges, Otmar and Black, Michael J.}, title = {WANDR: Intention-guided Human Motion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {927-936} }
Jointly Training and Pruning CNNs via Learnable Agent Guidance and Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ganjdanesh_2024_CVPR, author = {Ganjdanesh, Alireza and Gao, Shangqian and Huang, Heng}, title = {Jointly Training and Pruning CNNs via Learnable Agent Guidance and Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16058-16069} }
Estimating Noisy Class Posterior with Part-level Labels for Noisy Label Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Rui and Shi, Bin and Ruan, Jianfei and Pan, Tianze and Dong, Bo}, title = {Estimating Noisy Class Posterior with Part-level Labels for Noisy Label Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22809-22819} }
Leveraging Vision-Language Models for Improving Domain Generalization in Image Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Addepalli_2024_CVPR, author = {Addepalli, Sravanti and Asokan, Ashish Ramayee and Sharma, Lakshay and Babu, R. Venkatesh}, title = {Leveraging Vision-Language Models for Improving Domain Generalization in Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23922-23932} }
Diffusion-EDFs: Bi-equivariant Denoising Generative Modeling on SE(3) for Visual Robotic Manipulation-
[pdf]
[supp]
[bibtex]@InProceedings{Ryu_2024_CVPR, author = {Ryu, Hyunwoo and Kim, Jiwoo and An, Hyunseok and Chang, Junwoo and Seo, Joohwan and Kim, Taehan and Kim, Yubin and Hwang, Chaewon and Choi, Jongeun and Horowitz, Roberto}, title = {Diffusion-EDFs: Bi-equivariant Denoising Generative Modeling on SE(3) for Visual Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18007-18018} }
Prompt Learning via Meta-Regularization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2024_CVPR, author = {Park, Jinyoung and Ko, Juyeon and Kim, Hyunwoo J.}, title = {Prompt Learning via Meta-Regularization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26940-26950} }
Contrasting Intra-Modal and Ranking Cross-Modal Hard Negatives to Enhance Visio-Linguistic Compositional Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Le and Awal, Rabiul and Agrawal, Aishwarya}, title = {Contrasting Intra-Modal and Ranking Cross-Modal Hard Negatives to Enhance Visio-Linguistic Compositional Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13774-13784} }
CMA: A Chromaticity Map Adapter for Robust Detection of Screen-Recapture Document Images-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Changsheng and Lin, Liangwei and Chen, Yongqi and Li, Bin and Zeng, Jishen and Huang, Jiwu}, title = {CMA: A Chromaticity Map Adapter for Robust Detection of Screen-Recapture Document Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15577-15586} }
Embodied Multi-Modal Agent trained by an LLM from a Parallel TextWorld-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Yijun and Zhou, Tianyi and Li, Kanxue and Tao, Dapeng and Li, Lusong and Shen, Li and He, Xiaodong and Jiang, Jing and Shi, Yuhui}, title = {Embodied Multi-Modal Agent trained by an LLM from a Parallel TextWorld}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26275-26285} }
VA3: Virtually Assured Amplification Attack on Probabilistic Copyright Protection for Text-to-Image Generative Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Xiang and Shen, Qianli and Kawaguchi, Kenji}, title = {VA3: Virtually Assured Amplification Attack on Probabilistic Copyright Protection for Text-to-Image Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12363-12373} }
Point-VOS: Pointing Up Video Object Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Mahadevan_2024_CVPR, author = {Mahadevan, Sabarinath and Zulfikar, Idil Esen and Voigtlaender, Paul and Leibe, Bastian}, title = {Point-VOS: Pointing Up Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22217-22226} }
Intriguing Properties of Diffusion Models: An Empirical Study of the Natural Attack Capability in Text-to-Image Generative Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sato_2024_CVPR, author = {Sato, Takami and Yue, Justin and Chen, Nanze and Wang, Ningfei and Chen, Qi Alfred}, title = {Intriguing Properties of Diffusion Models: An Empirical Study of the Natural Attack Capability in Text-to-Image Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24635-24644} }
GroupContrast: Semantic-aware Self-supervised Representation Learning for 3D Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Chengyao and Jiang, Li and Wu, Xiaoyang and Tian, Zhuotao and Peng, Bohao and Zhao, Hengshuang and Jia, Jiaya}, title = {GroupContrast: Semantic-aware Self-supervised Representation Learning for 3D Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4917-4928} }
HouseCat6D - A Large-Scale Multi-Modal Category Level 6D Object Perception Dataset with Household Objects in Realistic Scenarios-
[pdf]
[supp]
[bibtex]@InProceedings{Jung_2024_CVPR, author = {Jung, HyunJun and Wu, Shun-Cheng and Ruhkamp, Patrick and Zhai, Guangyao and Schieber, Hannah and Rizzoli, Giulia and Wang, Pengyuan and Zhao, Hongcheng and Garattoni, Lorenzo and Meier, Sven and Roth, Daniel and Navab, Nassir and Busam, Benjamin}, title = {HouseCat6D - A Large-Scale Multi-Modal Category Level 6D Object Perception Dataset with Household Objects in Realistic Scenarios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22498-22508} }
Privacy-Preserving Face Recognition Using Trainable Feature Subtraction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mi_2024_CVPR, author = {Mi, Yuxi and Zhong, Zhizhou and Huang, Yuge and Ji, Jiazhen and Xu, Jianqing and Wang, Jun and Wang, Shaoming and Ding, Shouhong and Zhou, Shuigeng}, title = {Privacy-Preserving Face Recognition Using Trainable Feature Subtraction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {297-307} }
Towards Co-Evaluation of Cameras HDR and Algorithms for Industrial-Grade 6DoF Pose Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Kalra_2024_CVPR, author = {Kalra, Agastya and Stoppi, Guy and Marin, Dmitrii and Taamazyan, Vage and Shandilya, Aarrushi and Agarwal, Rishav and Boykov, Anton and Chong, Tze Hao and Stark, Michael}, title = {Towards Co-Evaluation of Cameras HDR and Algorithms for Industrial-Grade 6DoF Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22691-22701} }
Learning Visual Prompt for Gait Recognition-
[pdf]
[bibtex]@InProceedings{Ma_2024_CVPR, author = {Ma, Kang and Fu, Ying and Cao, Chunshui and Hou, Saihui and Huang, Yongzhen and Zheng, Dezhi}, title = {Learning Visual Prompt for Gait Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {593-603} }
MLP Can Be A Good Transformer Learner-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2024_CVPR, author = {Lin, Sihao and Lyu, Pumeng and Liu, Dongrui and Tang, Tao and Liang, Xiaodan and Song, Andy and Chang, Xiaojun}, title = {MLP Can Be A Good Transformer Learner}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19489-19498} }
GraphDreamer: Compositional 3D Scene Synthesis from Scene Graphs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2024_CVPR, author = {Gao, Gege and Liu, Weiyang and Chen, Anpei and Geiger, Andreas and Sch\"olkopf, Bernhard}, title = {GraphDreamer: Compositional 3D Scene Synthesis from Scene Graphs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21295-21304} }
Visual-Augmented Dynamic Semantic Prototype for Generative Zero-Shot Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Hou_2024_CVPR, author = {Hou, Wenjin and Chen, Shiming and Chen, Shuhuang and Hong, Ziming and Wang, Yan and Feng, Xuetao and Khan, Salman and Khan, Fahad Shahbaz and You, Xinge}, title = {Visual-Augmented Dynamic Semantic Prototype for Generative Zero-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23627-23637} }
Dynamic Prompt Optimizing for Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mo_2024_CVPR, author = {Mo, Wenyi and Zhang, Tianyu and Bai, Yalong and Su, Bing and Wen, Ji-Rong and Yang, Qing}, title = {Dynamic Prompt Optimizing for Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26627-26636} }
SC-GS: Sparse-Controlled Gaussian Splatting for Editable Dynamic Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Yi-Hua and Sun, Yang-Tian and Yang, Ziyi and Lyu, Xiaoyang and Cao, Yan-Pei and Qi, Xiaojuan}, title = {SC-GS: Sparse-Controlled Gaussian Splatting for Editable Dynamic Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4220-4230} }
360Loc: A Dataset and Benchmark for Omnidirectional Visual Localization with Cross-device Queries-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Huajian and Liu, Changkun and Zhu, Yipeng and Cheng, Hui and Braud, Tristan and Yeung, Sai-Kit}, title = {360Loc: A Dataset and Benchmark for Omnidirectional Visual Localization with Cross-device Queries}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22314-22324} }
Domain Gap Embeddings for Generative Dataset Augmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Yinong Oliver and Chung, Younjoon and Wu, Chen Henry and De la Torre, Fernando}, title = {Domain Gap Embeddings for Generative Dataset Augmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28684-28694} }
Geometrically-driven Aggregation for Zero-shot 3D Point Cloud Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mei_2024_CVPR, author = {Mei, Guofeng and Riz, Luigi and Wang, Yiming and Poiesi, Fabio}, title = {Geometrically-driven Aggregation for Zero-shot 3D Point Cloud Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27896-27905} }
Learning to Rank Patches for Unbiased Image Redundancy Reduction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2024_CVPR, author = {Luo, Yang and Chen, Zhineng and Zhou, Peng and Wu, Zuxuan and Gao, Xieping and Jiang, Yu-Gang}, title = {Learning to Rank Patches for Unbiased Image Redundancy Reduction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22831-22840} }
Going Beyond Multi-Task Dense Prediction with Synergy Embedding Models-
[pdf]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Huimin and Huang, Yawen and Lin, Lanfen and Tong, Ruofeng and Chen, Yen-Wei and Zheng, Hao and Li, Yuexiang and Zheng, Yefeng}, title = {Going Beyond Multi-Task Dense Prediction with Synergy Embedding Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28181-28190} }
Disentangled Pre-training for Human-Object Interaction Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Zhuolong and Li, Xingao and Ding, Changxing and Xu, Xiangmin}, title = {Disentangled Pre-training for Human-Object Interaction Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28191-28201} }
Light the Night: A Multi-Condition Diffusion Framework for Unpaired Low-Light Enhancement in Autonomous Driving-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Jinlong and Li, Baolu and Tu, Zhengzhong and Liu, Xinyu and Guo, Qing and Juefei-Xu, Felix and Xu, Runsheng and Yu, Hongkai}, title = {Light the Night: A Multi-Condition Diffusion Framework for Unpaired Low-Light Enhancement in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15205-15215} }
MetaCloak: Preventing Unauthorized Subject-driven Text-to-image Diffusion-based Synthesis via Meta-learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Yixin and Fan, Chenrui and Dai, Yutong and Chen, Xun and Zhou, Pan and Sun, Lichao}, title = {MetaCloak: Preventing Unauthorized Subject-driven Text-to-image Diffusion-based Synthesis via Meta-learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24219-24228} }
Neural Modes: Self-supervised Learning of Nonlinear Modal Subspaces-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Jiahong and Du, Yinwei and Coros, Stelian and Thomaszewski, Bernhard}, title = {Neural Modes: Self-supervised Learning of Nonlinear Modal Subspaces}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23158-23167} }
How to Train Neural Field Representations: A Comprehensive Study and Benchmark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Papa_2024_CVPR, author = {Papa, Samuele and Valperga, Riccardo and Knigge, David and Kofinas, Miltiadis and Lippe, Phillip and Sonke, Jan-Jakob and Gavves, Efstratios}, title = {How to Train Neural Field Representations: A Comprehensive Study and Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22616-22625} }
Delving into the Trajectory Long-tail Distribution for Muti-object Tracking-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Sijia and Yu, En and Li, Jinyang and Tao, Wenbing}, title = {Delving into the Trajectory Long-tail Distribution for Muti-object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19341-19351} }
Tri-Modal Motion Retrieval by Learning a Joint Embedding Space-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yin_2024_CVPR, author = {Yin, Kangning and Zou, Shihao and Ge, Yuxuan and Tian, Zheng}, title = {Tri-Modal Motion Retrieval by Learning a Joint Embedding Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1596-1605} }
Seg2Reg: Differentiable 2D Segmentation to 1D Regression Rendering for 360 Room Layout Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2024_CVPR, author = {Sun, Cheng and Tai, Wei-En and Shih, Yu-Lin and Chen, Kuan-Wei and Syu, Yong-Jing and The, Kent Selwyn and Wang, Yu-Chiang Frank and Chen, Hwann-Tzong}, title = {Seg2Reg: Differentiable 2D Segmentation to 1D Regression Rendering for 360 Room Layout Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10435-10445} }
Strong Transferable Adversarial Attacks via Ensembled Asymptotically Normal Distribution Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2024_CVPR, author = {Fang, Zhengwei and Wang, Rui and Huang, Tao and Jing, Liping}, title = {Strong Transferable Adversarial Attacks via Ensembled Asymptotically Normal Distribution Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24841-24850} }
Spanning Training Progress: Temporal Dual-Depth Scoring (TDDS) for Enhanced Dataset Pruning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Xin and Du, Jiawei and Li, Yunsong and Xie, Weiying and Zhou, Joey Tianyi}, title = {Spanning Training Progress: Temporal Dual-Depth Scoring (TDDS) for Enhanced Dataset Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26223-26232} }
UniMix: Towards Domain Adaptive and Generalizable LiDAR Semantic Segmentation in Adverse Weather-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Haimei and Zhang, Jing and Chen, Zhuo and Zhao, Shanshan and Tao, Dacheng}, title = {UniMix: Towards Domain Adaptive and Generalizable LiDAR Semantic Segmentation in Adverse Weather}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14781-14791} }
Visual Delta Generator with Large Multi-modal Models for Semi-supervised Composed Image Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jang_2024_CVPR, author = {Jang, Young Kyun and Kim, Donghyun and Meng, Zihang and Huynh, Dat and Lim, Ser-Nam}, title = {Visual Delta Generator with Large Multi-modal Models for Semi-supervised Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16805-16814} }
Selective Interpretable and Motion Consistent Privacy Attribute Obfuscation for Action Recognition-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ilic_2024_CVPR, author = {Ilic, Filip and Zhao, He and Pock, Thomas and Wildes, Richard P.}, title = {Selective Interpretable and Motion Consistent Privacy Attribute Obfuscation for Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18730-18739} }
HiPose: Hierarchical Binary Surface Encoding and Correspondence Pruning for RGB-D 6DoF Object Pose Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2024_CVPR, author = {Lin, Yongliang and Su, Yongzhi and Nathan, Praveen and Inuganti, Sandeep and Di, Yan and Sundermeyer, Martin and Manhardt, Fabian and Stricker, Didier and Rambach, Jason and Zhang, Yu}, title = {HiPose: Hierarchical Binary Surface Encoding and Correspondence Pruning for RGB-D 6DoF Object Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10148-10158} }
DiffForensics: Leveraging Diffusion Prior to Image Forgery Detection and Localization-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Zeqin and Ni, Jiangqun and Lin, Yuzhen and Deng, Haoyi and Li, Bin}, title = {DiffForensics: Leveraging Diffusion Prior to Image Forgery Detection and Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12765-12774} }
CoSeR: Bridging Image and Language for Cognitive Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2024_CVPR, author = {Sun, Haoze and Li, Wenbo and Liu, Jianzhuang and Chen, Haoyu and Pei, Renjing and Zou, Xueyi and Yan, Youliang and Yang, Yujiu}, title = {CoSeR: Bridging Image and Language for Cognitive Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25868-25878} }
Geometry-aware Reconstruction and Fusion-refined Rendering for Generalizable Neural Radiance Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Tianqi and Ye, Xinyi and Shi, Min and Huang, Zihao and Pan, Zhiyu and Peng, Zhan and Cao, Zhiguo}, title = {Geometry-aware Reconstruction and Fusion-refined Rendering for Generalizable Neural Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7654-7663} }
Boosting Self-Supervision for Single-View Scene Completion via Knowledge Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2024_CVPR, author = {Han, Keonhee and Muhle, Dominik and Wimbauer, Felix and Cremers, Daniel}, title = {Boosting Self-Supervision for Single-View Scene Completion via Knowledge Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9837-9847} }
PromptKD: Unsupervised Prompt Distillation for Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Zheng and Li, Xiang and Fu, Xinyi and Zhang, Xin and Wang, Weiqiang and Chen, Shuo and Yang, Jian}, title = {PromptKD: Unsupervised Prompt Distillation for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26617-26626} }
VideoBooth: Diffusion-based Video Generation with Image Prompts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2024_CVPR, author = {Jiang, Yuming and Wu, Tianxing and Yang, Shuai and Si, Chenyang and Lin, Dahua and Qiao, Yu and Loy, Chen Change and Liu, Ziwei}, title = {VideoBooth: Diffusion-based Video Generation with Image Prompts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6689-6700} }
Robust Overfitting Does Matter: Test-Time Adversarial Purification With FGSM-
[pdf]
[arXiv]
[bibtex]@InProceedings{Tang_2024_CVPR, author = {Tang, Linyu and Zhang, Lei}, title = {Robust Overfitting Does Matter: Test-Time Adversarial Purification With FGSM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24347-24356} }
Sparse Global Matching for Video Frame Interpolation with Large Motion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Chunxu and Zhang, Guozhen and Zhao, Rui and Wang, Limin}, title = {Sparse Global Matching for Video Frame Interpolation with Large Motion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19125-19134} }
ExtDM: Distribution Extrapolation Diffusion Model for Video Prediction-
[pdf]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Zhicheng and Hu, Junyao and Cheng, Wentao and Paudel, Danda and Yang, Jufeng}, title = {ExtDM: Distribution Extrapolation Diffusion Model for Video Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19310-19320} }
Modality-Collaborative Test-Time Adaptation for Action Recognition-
[pdf]
[bibtex]@InProceedings{Xiong_2024_CVPR, author = {Xiong, Baochen and Yang, Xiaoshan and Song, Yaguang and Wang, Yaowei and Xu, Changsheng}, title = {Modality-Collaborative Test-Time Adaptation for Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26732-26741} }
SCULPT: Shape-Conditioned Unpaired Learning of Pose-dependent Clothed and Textured Human Meshes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sanyal_2024_CVPR, author = {Sanyal, Soubhik and Ghosh, Partha and Yang, Jinlong and Black, Michael J. and Thies, Justus and Bolkart, Timo}, title = {SCULPT: Shape-Conditioned Unpaired Learning of Pose-dependent Clothed and Textured Human Meshes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2362-2371} }
Point Segment and Count: A Generalized Framework for Object Counting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Zhizhong and Dai, Mingliang and Zhang, Yi and Zhang, Junping and Shan, Hongming}, title = {Point Segment and Count: A Generalized Framework for Object Counting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17067-17076} }
Small Steps and Level Sets: Fitting Neural Surface Models with Point Guidance-
[pdf]
[supp]
[bibtex]@InProceedings{Koneputugodage_2024_CVPR, author = {Koneputugodage, Chamin Hewa and Ben-Shabat, Yizhak and Campbell, Dylan and Gould, Stephen}, title = {Small Steps and Level Sets: Fitting Neural Surface Models with Point Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21456-21465} }
Domain-Agnostic Mutual Prompting for Unsupervised Domain Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Du_2024_CVPR, author = {Du, Zhekai and Li, Xinyao and Li, Fengling and Lu, Ke and Zhu, Lei and Li, Jingjing}, title = {Domain-Agnostic Mutual Prompting for Unsupervised Domain Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23375-23384} }
PTT: Point-Trajectory Transformer for Efficient Temporal 3D Object Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Kuan-Chih and Lyu, Weijie and Yang, Ming-Hsuan and Tsai, Yi-Hsuan}, title = {PTT: Point-Trajectory Transformer for Efficient Temporal 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14938-14947} }
Generative Proxemics: A Prior for 3D Social Interaction from Images-
[pdf]
[supp]
[bibtex]@InProceedings{Muller_2024_CVPR, author = {M\"uller, Lea and Ye, Vickie and Pavlakos, Georgios and Black, Michael and Kanazawa, Angjoo}, title = {Generative Proxemics: A Prior for 3D Social Interaction from Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9687-9697} }
A Simple and Effective Point-based Network for Event Camera 6-DOFs Pose Relocalization-
[pdf]
[bibtex]@InProceedings{Ren_2024_CVPR, author = {Ren, Hongwei and Zhu, Jiadong and Zhou, Yue and Fu, Haotian and Huang, Yulong and Cheng, Bojun}, title = {A Simple and Effective Point-based Network for Event Camera 6-DOFs Pose Relocalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18112-18121} }
Semantic-Aware Multi-Label Adversarial Attacks-
[pdf]
[supp]
[bibtex]@InProceedings{Mahmood_2024_CVPR, author = {Mahmood, Hassan and Elhamifar, Ehsan}, title = {Semantic-Aware Multi-Label Adversarial Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24251-24262} }
EasyDrag: Efficient Point-based Manipulation on Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Hou_2024_CVPR, author = {Hou, Xingzhong and Liu, Boxiao and Zhang, Yi and Liu, Jihao and Liu, Yu and You, Haihang}, title = {EasyDrag: Efficient Point-based Manipulation on Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8404-8413} }
Region-Based Representations Revisited-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shlapentokh-Rothman_2024_CVPR, author = {Shlapentokh-Rothman, Michal and Blume, Ansel and Xiao, Yao and Wu, Yuqun and TV, Sethuraman and Tao, Heyi and Lee, Jae Yong and Torres, Wilfredo and Wang, Yu-Xiong and Hoiem, Derek}, title = {Region-Based Representations Revisited}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17107-17116} }
GenH2R: Learning Generalizable Human-to-Robot Handover via Scalable Simulation Demonstration and Imitation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Zifan and Chen, Junyu and Chen, Ziqing and Xie, Pengwei and Chen, Rui and Yi, Li}, title = {GenH2R: Learning Generalizable Human-to-Robot Handover via Scalable Simulation Demonstration and Imitation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16362-16372} }
Modality-Agnostic Structural Image Representation Learning for Deformable Multi-Modality Medical Image Registration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mok_2024_CVPR, author = {Mok, Tony C. W. and Li, Zi and Bai, Yunhao and Zhang, Jianpeng and Liu, Wei and Zhou, Yan-Jie and Yan, Ke and Jin, Dakai and Shi, Yu and Yin, Xiaoli and Lu, Le and Zhang, Ling}, title = {Modality-Agnostic Structural Image Representation Learning for Deformable Multi-Modality Medical Image Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11215-11225} }
Any-Shift Prompting for Generalization over Distributions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2024_CVPR, author = {Xiao, Zehao and Shen, Jiayi and Derakhshani, Mohammad Mahdi and Liao, Shengcai and Snoek, Cees G. M.}, title = {Any-Shift Prompting for Generalization over Distributions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13849-13860} }
InterHandGen: Two-Hand Interaction Generation via Cascaded Reverse Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2024_CVPR, author = {Lee, Jihyun and Saito, Shunsuke and Nam, Giljoo and Sung, Minhyuk and Kim, Tae-Kyun}, title = {InterHandGen: Two-Hand Interaction Generation via Cascaded Reverse Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {527-537} }
CPR-Coach: Recognizing Composite Error Actions based on Single-class Training-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Shunli and Wang, Shuaibing and Yang, Dingkang and Li, Mingcheng and Kuang, Haopeng and Zhao, Xiao and Su, Liuzhen and Zhai, Peng and Zhang, Lihua}, title = {CPR-Coach: Recognizing Composite Error Actions based on Single-class Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18782-18792} }
Video2Game: Real-time Interactive Realistic and Browser-Compatible Environment from a Single Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2024_CVPR, author = {Xia, Hongchi and Lin, Zhi-Hao and Ma, Wei-Chiu and Wang, Shenlong}, title = {Video2Game: Real-time Interactive Realistic and Browser-Compatible Environment from a Single Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4578-4588} }
Tackling the Singularities at the Endpoints of Time Intervals in Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Pengze and Yin, Hubery and Li, Chen and Xie, Xiaohua}, title = {Tackling the Singularities at the Endpoints of Time Intervals in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6945-6954} }
MatSynth: A Modern PBR Materials Dataset-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vecchio_2024_CVPR, author = {Vecchio, Giuseppe and Deschaintre, Valentin}, title = {MatSynth: A Modern PBR Materials Dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22109-22118} }
CHAIN: Enhancing Generalization in Data-Efficient GANs via lipsCHitz continuity constrAIned Normalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ni_2024_CVPR, author = {Ni, Yao and Koniusz, Piotr}, title = {CHAIN: Enhancing Generalization in Data-Efficient GANs via lipsCHitz continuity constrAIned Normalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6763-6774} }
RTracker: Recoverable Tracking via PN Tree Structured Memory-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Yuqing and Li, Xin and Zhou, Zikun and Wang, Yaowei and He, Zhenyu and Yang, Ming-Hsuan}, title = {RTracker: Recoverable Tracking via PN Tree Structured Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19038-19047} }
High-Quality Facial Geometry and Appearance Capture at Home-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2024_CVPR, author = {Han, Yuxuan and Lyu, Junfeng and Xu, Feng}, title = {High-Quality Facial Geometry and Appearance Capture at Home}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {697-707} }
DualAD: Disentangling the Dynamic and Static World for End-to-End Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Doll_2024_CVPR, author = {Doll, Simon and Hanselmann, Niklas and Schneider, Lukas and Schulz, Richard and Cordts, Marius and Enzweiler, Markus and Lensch, Hendrik P. A.}, title = {DualAD: Disentangling the Dynamic and Static World for End-to-End Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14728-14737} }
OTE: Exploring Accurate Scene Text Recognition Using One Token-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Jianjun and Wang, Yuxin and Xie, Hongtao and Zhang, Yongdong}, title = {OTE: Exploring Accurate Scene Text Recognition Using One Token}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28327-28336} }
MULDE: Multiscale Log-Density Estimation via Denoising Score Matching for Video Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Micorek_2024_CVPR, author = {Micorek, Jakub and Possegger, Horst and Narnhofer, Dominik and Bischof, Horst and Kozinski, Mateusz}, title = {MULDE: Multiscale Log-Density Estimation via Denoising Score Matching for Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18868-18877} }
Your Image is My Video: Reshaping the Receptive Field via Image-To-Video Differentiable AutoAugmentation and Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Casarin_2024_CVPR, author = {Casarin, Sofia and Ugwu, Cynthia I. and Escalera, Sergio and Lanz, Oswald}, title = {Your Image is My Video: Reshaping the Receptive Field via Image-To-Video Differentiable AutoAugmentation and Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5829-5839} }
PTQ4SAM: Post-Training Quantization for Segment Anything-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lv_2024_CVPR, author = {Lv, Chengtao and Chen, Hong and Guo, Jinyang and Ding, Yifu and Liu, Xianglong}, title = {PTQ4SAM: Post-Training Quantization for Segment Anything}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15941-15951} }
Improving Bird's Eye View Semantic Segmentation by Task Decomposition-
[pdf]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Tianhao and Chen, Yongcan and Wu, Yu and Liu, Tianyang and Du, Bo and Xiao, Peilun and Qiu, Shi and Yang, Hongda and Li, Guozhen and Yang, Yi and Lin, Yutian}, title = {Improving Bird's Eye View Semantic Segmentation by Task Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15512-15521} }
SpikingResformer: Bridging ResNet and Vision Transformer in Spiking Neural Networks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2024_CVPR, author = {Shi, Xinyu and Hao, Zecheng and Yu, Zhaofei}, title = {SpikingResformer: Bridging ResNet and Vision Transformer in Spiking Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5610-5619} }
Scene Adaptive Sparse Transformer for Event-based Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2024_CVPR, author = {Peng, Yansong and Li, Hebei and Zhang, Yueyi and Sun, Xiaoyan and Wu, Feng}, title = {Scene Adaptive Sparse Transformer for Event-based Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16794-16804} }
Gaussian Shadow Casting for Neural Characters-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bolanos_2024_CVPR, author = {Bolanos, Luis and Su, Shih-Yang and Rhodin, Helge}, title = {Gaussian Shadow Casting for Neural Characters}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20997-21006} }
CURSOR: Scalable Mixed-Order Hypergraph Matching with CUR Decomposition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2024_CVPR, author = {Zheng, Qixuan and Zhang, Ming and Yan, Hong}, title = {CURSOR: Scalable Mixed-Order Hypergraph Matching with CUR Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16036-16045} }
Federated Online Adaptation for Deep Stereo-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Poggi_2024_CVPR, author = {Poggi, Matteo and Tosi, Fabio}, title = {Federated Online Adaptation for Deep Stereo}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20165-20175} }
Sequential Modeling Enables Scalable Learning for Large Vision Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bai_2024_CVPR, author = {Bai, Yutong and Geng, Xinyang and Mangalam, Karttikeya and Bar, Amir and Yuille, Alan L. and Darrell, Trevor and Malik, Jitendra and Efros, Alexei A.}, title = {Sequential Modeling Enables Scalable Learning for Large Vision Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22861-22872} }
Self-Supervised Dual Contouring-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sundararaman_2024_CVPR, author = {Sundararaman, Ramana and Klokov, Roman and Ovsjanikov, Maks}, title = {Self-Supervised Dual Contouring}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4681-4691} }
Regularized Parameter Uncertainty for Improving Generalization in Reinforcement Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Moure_2024_CVPR, author = {Moure, Pehuen and Cheng, Longbiao and Ott, Joachim and Wang, Zuowen and Liu, Shih-Chii}, title = {Regularized Parameter Uncertainty for Improving Generalization in Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23805-23814} }
GigaTraj: Predicting Long-term Trajectories of Hundreds of Pedestrians in Gigapixel Complex Scenes-
[pdf]
[bibtex]@InProceedings{Lin_2024_CVPR, author = {Lin, Haozhe and Wei, Chunyu and He, Li and Guo, Yuchen and Zhao, Yunqi and Li, Shanglong and Fang, Lu}, title = {GigaTraj: Predicting Long-term Trajectories of Hundreds of Pedestrians in Gigapixel Complex Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19331-19340} }
GSVA: Generalized Segmentation via Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2024_CVPR, author = {Xia, Zhuofan and Han, Dongchen and Han, Yizeng and Pan, Xuran and Song, Shiji and Huang, Gao}, title = {GSVA: Generalized Segmentation via Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3858-3869} }
AdaBM: On-the-Fly Adaptive Bit Mapping for Image Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hong_2024_CVPR, author = {Hong, Cheeun and Lee, Kyoung Mu}, title = {AdaBM: On-the-Fly Adaptive Bit Mapping for Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2641-2650} }
CoralSCOP: Segment any COral Image on this Planet-
[pdf]
[supp]
[bibtex]@InProceedings{Zheng_2024_CVPR, author = {Zheng, Ziqiang and Liang, Haixin and Hua, Binh-Son and Wong, Yue Him and Ang, Jr, Put and Chui, Apple Pui Yi and Yeung, Sai-Kit}, title = {CoralSCOP: Segment any COral Image on this Planet}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28170-28180} }
SVGDreamer: Text Guided SVG Generation with Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xing_2024_CVPR, author = {Xing, Ximing and Zhou, Haitao and Wang, Chuang and Zhang, Jing and Xu, Dong and Yu, Qian}, title = {SVGDreamer: Text Guided SVG Generation with Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4546-4555} }
BlockGCN: Redefine Topology Awareness for Skeleton-Based Action Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Yuxuan and Yan, Xudong and Cheng, Zhi-Qi and Yan, Yan and Dai, Qi and Hua, Xian-Sheng}, title = {BlockGCN: Redefine Topology Awareness for Skeleton-Based Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2049-2058} }
Improved Baselines with Visual Instruction Tuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Haotian and Li, Chunyuan and Li, Yuheng and Lee, Yong Jae}, title = {Improved Baselines with Visual Instruction Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26296-26306} }
Structure-Guided Adversarial Training of Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Ling and Qian, Haotian and Zhang, Zhilong and Liu, Jingwei and Cui, Bin}, title = {Structure-Guided Adversarial Training of Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7256-7266} }
NIFTY: Neural Object Interaction Fields for Guided Human Motion Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kulkarni_2024_CVPR, author = {Kulkarni, Nilesh and Rempe, Davis and Genova, Kyle and Kundu, Abhijit and Johnson, Justin and Fouhey, David and Guibas, Leonidas}, title = {NIFTY: Neural Object Interaction Fields for Guided Human Motion Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {947-957} }
C2KD: Bridging the Modality Gap for Cross-Modal Knowledge Distillation-
[pdf]
[supp]
[bibtex]@InProceedings{Huo_2024_CVPR, author = {Huo, Fushuo and Xu, Wenchao and Guo, Jingcai and Wang, Haozhao and Guo, Song}, title = {C2KD: Bridging the Modality Gap for Cross-Modal Knowledge Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16006-16015} }
Traceable Federated Continual Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Qiang and Liu, Bingyan and Li, Yawen}, title = {Traceable Federated Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12872-12881} }
Can Language Beat Numerical Regression? Language-Based Multimodal Trajectory Prediction-
[pdf]
[arXiv]
[bibtex]@InProceedings{Bae_2024_CVPR, author = {Bae, Inhwan and Lee, Junoh and Jeon, Hae-Gon}, title = {Can Language Beat Numerical Regression? Language-Based Multimodal Trajectory Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {753-766} }
Building Optimal Neural Architectures using Interpretable Knowledge-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mills_2024_CVPR, author = {Mills, Keith G. and Han, Fred X. and Salameh, Mohammad and Lu, Shengyao and Zhou, Chunhua and He, Jiao and Sun, Fengyu and Niu, Di}, title = {Building Optimal Neural Architectures using Interpretable Knowledge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5726-5735} }
V?: Guided Visual Search as a Core Mechanism in Multimodal LLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Penghao and Xie, Saining}, title = {V?: Guided Visual Search as a Core Mechanism in Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13084-13094} }
Unexplored Faces of Robustness and Out-of-Distribution: Covariate Shifts in Environment and Sensor Domains-
[pdf]
[supp]
[bibtex]@InProceedings{Baek_2024_CVPR, author = {Baek, Eunsu and Park, Keondo and Kim, Jiyoon and Kim, Hyung-Sin}, title = {Unexplored Faces of Robustness and Out-of-Distribution: Covariate Shifts in Environment and Sensor Domains}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22294-22303} }
Uncertainty Visualization via Low-Dimensional Posterior Projections-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yair_2024_CVPR, author = {Yair, Omer and Nehme, Elias and Michaeli, Tomer}, title = {Uncertainty Visualization via Low-Dimensional Posterior Projections}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11041-11051} }
VSCode: General Visual Salient and Camouflaged Object Detection with 2D Prompt Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2024_CVPR, author = {Luo, Ziyang and Liu, Nian and Zhao, Wangbo and Yang, Xuguang and Zhang, Dingwen and Fan, Deng-Ping and Khan, Fahad and Han, Junwei}, title = {VSCode: General Visual Salient and Camouflaged Object Detection with 2D Prompt Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17169-17180} }
GaussianEditor: Swift and Controllable 3D Editing with Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Yiwen and Chen, Zilong and Zhang, Chi and Wang, Feng and Yang, Xiaofeng and Wang, Yikai and Cai, Zhongang and Yang, Lei and Liu, Huaping and Lin, Guosheng}, title = {GaussianEditor: Swift and Controllable 3D Editing with Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21476-21485} }
Holo-Relighting: Controllable Volumetric Portrait Relighting from a Single Image-
[pdf]
[supp]
[bibtex]@InProceedings{Mei_2024_CVPR, author = {Mei, Yiqun and Zeng, Yu and Zhang, He and Shu, Zhixin and Zhang, Xuaner and Bi, Sai and Zhang, Jianming and Jung, HyunJoon and Patel, Vishal M.}, title = {Holo-Relighting: Controllable Volumetric Portrait Relighting from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4263-4273} }
Noisy One-point Homographies are Surprisingly Good-
[pdf]
[supp]
[bibtex]@InProceedings{Ding_2024_CVPR, author = {Ding, Yaqing and Astermark, Jonathan and Oskarsson, Magnus and Larsson, Viktor}, title = {Noisy One-point Homographies are Surprisingly Good}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5125-5134} }
PointInfinity: Resolution-Invariant Point Diffusion Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Zixuan and Johnson, Justin and Debnath, Shoubhik and Rehg, James M. and Wu, Chao-Yuan}, title = {PointInfinity: Resolution-Invariant Point Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10050-10060} }
Panacea: Panoramic and Controllable Video Generation for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wen_2024_CVPR, author = {Wen, Yuqing and Zhao, Yucheng and Liu, Yingfei and Jia, Fan and Wang, Yanhui and Luo, Chong and Zhang, Chi and Wang, Tiancai and Sun, Xiaoyan and Zhang, Xiangyu}, title = {Panacea: Panoramic and Controllable Video Generation for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6902-6912} }
Open-Vocabulary Semantic Segmentation with Image Embedding Balancing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shan_2024_CVPR, author = {Shan, Xiangheng and Wu, Dongyue and Zhu, Guilin and Shao, Yuanjie and Sang, Nong and Gao, Changxin}, title = {Open-Vocabulary Semantic Segmentation with Image Embedding Balancing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28412-28421} }
Structured Model Probing: Empowering Efficient Transfer Learning by Structured Regularization-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Zhi-Fan and Mao, Chaojie and Wang, Wue and Jiang, Jianwen and Lv, Yiliang and Jin, Rong}, title = {Structured Model Probing: Empowering Efficient Transfer Learning by Structured Regularization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16849-16858} }
Multi-Modal Proxy Learning Towards Personalized Visual Multiple Clustering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yao_2024_CVPR, author = {Yao, Jiawei and Qian, Qi and Hu, Juhua}, title = {Multi-Modal Proxy Learning Towards Personalized Visual Multiple Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14066-14075} }
DreamMatcher: Appearance Matching Self-Attention for Semantically-Consistent Text-to-Image Personalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nam_2024_CVPR, author = {Nam, Jisu and Kim, Heesu and Lee, DongJae and Jin, Siyoon and Kim, Seungryong and Chang, Seunggyu}, title = {DreamMatcher: Appearance Matching Self-Attention for Semantically-Consistent Text-to-Image Personalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8100-8110} }
Stronger Fewer & Superior: Harnessing Vision Foundation Models for Domain Generalized Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2024_CVPR, author = {Wei, Zhixiang and Chen, Lin and Jin, Yi and Ma, Xiaoxiao and Liu, Tianle and Ling, Pengyang and Wang, Ben and Chen, Huaian and Zheng, Jinjin}, title = {Stronger Fewer \& Superior: Harnessing Vision Foundation Models for Domain Generalized Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28619-28630} }
PolarMatte: Fully Computational Ground-Truth-Quality Alpha Matte Extraction for Images and Video using Polarized Screen Matting-
[pdf]
[supp]
[bibtex]@InProceedings{Enomoto_2024_CVPR, author = {Enomoto, Kenji and Rhodes, TJ and Price, Brian and Miller, Gavin}, title = {PolarMatte: Fully Computational Ground-Truth-Quality Alpha Matte Extraction for Images and Video using Polarized Screen Matting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3901-3909} }
ChAda-ViT : Channel Adaptive Attention for Joint Representation Learning of Heterogeneous Microscopy Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bourriez_2024_CVPR, author = {Bourriez, Nicolas and Bendidi, Ihab and Cohen, Ethan and Watkinson, Gabriel and Sanchez, Maxime and Bollot, Guillaume and Genovesio, Auguste}, title = {ChAda-ViT : Channel Adaptive Attention for Joint Representation Learning of Heterogeneous Microscopy Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11556-11565} }
CARZero: Cross-Attention Alignment for Radiology Zero-Shot Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lai_2024_CVPR, author = {Lai, Haoran and Yao, Qingsong and Jiang, Zihang and Wang, Rongsheng and He, Zhiyang and Tao, Xiaodong and Zhou, S. Kevin}, title = {CARZero: Cross-Attention Alignment for Radiology Zero-Shot Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11137-11146} }
HOIDiffusion: Generating Realistic 3D Hand-Object Interaction Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Mengqi and Fu, Yang and Ding, Zheng and Liu, Sifei and Tu, Zhuowen and Wang, Xiaolong}, title = {HOIDiffusion: Generating Realistic 3D Hand-Object Interaction Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8521-8531} }
VecFusion: Vector Font Generation with Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Thamizharasan_2024_CVPR, author = {Thamizharasan, Vikas and Liu, Difan and Agarwal, Shantanu and Fisher, Matthew and Gharbi, Michael and Wang, Oliver and Jacobson, Alec and Kalogerakis, Evangelos}, title = {VecFusion: Vector Font Generation with Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7943-7952} }
Multi-Modal Hallucination Control by Visual Information Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Favero_2024_CVPR, author = {Favero, Alessandro and Zancato, Luca and Trager, Matthew and Choudhary, Siddharth and Perera, Pramuditha and Achille, Alessandro and Swaminathan, Ashwin and Soatto, Stefano}, title = {Multi-Modal Hallucination Control by Visual Information Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14303-14312} }
Towards Text-guided 3D Scene Composition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Qihang and Wang, Chaoyang and Siarohin, Aliaksandr and Zhuang, Peiye and Xu, Yinghao and Yang, Ceyuan and Lin, Dahua and Zhou, Bolei and Tulyakov, Sergey and Lee, Hsin-Ying}, title = {Towards Text-guided 3D Scene Composition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6829-6838} }
EMAGE: Towards Unified Holistic Co-Speech Gesture Generation via Expressive Masked Audio Gesture Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Haiyang and Zhu, Zihao and Becherini, Giorgio and Peng, Yichen and Su, Mingyang and Zhou, You and Zhe, Xuefei and Iwamoto, Naoya and Zheng, Bo and Black, Michael J.}, title = {EMAGE: Towards Unified Holistic Co-Speech Gesture Generation via Expressive Masked Audio Gesture Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1144-1154} }
Adversarial Text to Continuous Image Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Haydarov_2024_CVPR, author = {Haydarov, Kilichbek and Muhamed, Aashiq and Shen, Xiaoqian and Lazarevic, Jovana and Skorokhodov, Ivan and Galappaththige, Chamuditha Jayanga and Elhoseiny, Mohamed}, title = {Adversarial Text to Continuous Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6316-6326} }
The Neglected Tails in Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Parashar_2024_CVPR, author = {Parashar, Shubham and Lin, Zhiqiu and Liu, Tian and Dong, Xiangjue and Li, Yanan and Ramanan, Deva and Caverlee, James and Kong, Shu}, title = {The Neglected Tails in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12988-12997} }
Learning Background Prompts to Discover Implicit Knowledge for Open Vocabulary Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Jiaming and Zhang, Jiacheng and Li, Jichang and Li, Ge and Liu, Si and Lin, Liang and Li, Guanbin}, title = {Learning Background Prompts to Discover Implicit Knowledge for Open Vocabulary Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16678-16687} }
HumanNeRF-SE: A Simple yet Effective Approach to Animate HumanNeRF with Diverse Poses-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2024_CVPR, author = {Ma, Caoyuan and Liu, Yu-Lun and Wang, Zhixiang and Liu, Wu and Liu, Xinchen and Wang, Zheng}, title = {HumanNeRF-SE: A Simple yet Effective Approach to Animate HumanNeRF with Diverse Poses}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1460-1470} }
HOLD: Category-agnostic 3D Reconstruction of Interacting Hands and Objects from Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2024_CVPR, author = {Fan, Zicong and Parelli, Maria and Kadoglou, Maria Eleni and Chen, Xu and Kocabas, Muhammed and Black, Michael J. and Hilliges, Otmar}, title = {HOLD: Category-agnostic 3D Reconstruction of Interacting Hands and Objects from Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {494-504} }
Continual Segmentation with Disentangled Objectness Learning and Class Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gong_2024_CVPR, author = {Gong, Yizheng and Yu, Siyue and Wang, Xiaoyang and Xiao, Jimin}, title = {Continual Segmentation with Disentangled Objectness Learning and Class Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3848-3857} }
Towards Accurate Post-training Quantization for Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Changyuan and Wang, Ziwei and Xu, Xiuwei and Tang, Yansong and Zhou, Jie and Lu, Jiwen}, title = {Towards Accurate Post-training Quantization for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16026-16035} }
ASAM: Boosting Segment Anything Model with Adversarial Tuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Bo and Xiao, Haoke and Tang, Lv}, title = {ASAM: Boosting Segment Anything Model with Adversarial Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3699-3710} }
UniBind: LLM-Augmented Unified and Balanced Representation Space to Bind Them All-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lyu_2024_CVPR, author = {Lyu, Yuanhuiyi and Zheng, Xu and Zhou, Jiazhou and Wang, Lin}, title = {UniBind: LLM-Augmented Unified and Balanced Representation Space to Bind Them All}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26752-26762} }
Dynamic Support Information Mining for Category-Agnostic Pose Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Ren_2024_CVPR, author = {Ren, Pengfei and Gao, Yuanyuan and Sun, Haifeng and Qi, Qi and Wang, Jingyu and Liao, Jianxin}, title = {Dynamic Support Information Mining for Category-Agnostic Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1921-1930} }
Test-Time Adaptation for Depth Completion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2024_CVPR, author = {Park, Hyoungseob and Gupta, Anjali and Wong, Alex}, title = {Test-Time Adaptation for Depth Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20519-20529} }
GOAT-Bench: A Benchmark for Multi-Modal Lifelong Navigation-
[pdf]
[supp]
[bibtex]@InProceedings{Khanna_2024_CVPR, author = {Khanna, Mukul and Ramrakhya, Ram and Chhablani, Gunjan and Yenamandra, Sriram and Gervet, Theophile and Chang, Matthew and Kira, Zsolt and Chaplot, Devendra Singh and Batra, Dhruv and Mottaghi, Roozbeh}, title = {GOAT-Bench: A Benchmark for Multi-Modal Lifelong Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16373-16383} }
Taming Mode Collapse in Score Distillation for Text-to-3D Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Peihao and Xu, Dejia and Fan, Zhiwen and Wang, Dilin and Mohan, Sreyas and Iandola, Forrest and Ranjan, Rakesh and Li, Yilei and Liu, Qiang and Wang, Zhangyang and Chandra, Vikas}, title = {Taming Mode Collapse in Score Distillation for Text-to-3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9037-9047} }
Binarized Low-light Raw Video Enhancement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Gengchen and Zhang, Yulun and Yuan, Xin and Fu, Ying}, title = {Binarized Low-light Raw Video Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25753-25762} }
MorpheuS: Neural Dynamic 360deg Surface Reconstruction from Monocular RGB-D Video-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Hengyi and Wang, Jingwen and Agapito, Lourdes}, title = {MorpheuS: Neural Dynamic 360deg Surface Reconstruction from Monocular RGB-D Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20965-20976} }
Decoupling Static and Hierarchical Motion Perception for Referring Video Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{He_2024_CVPR, author = {He, Shuting and Ding, Henghui}, title = {Decoupling Static and Hierarchical Motion Perception for Referring Video Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13332-13341} }
MagicAnimate: Temporally Consistent Human Image Animation using Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Zhongcong and Zhang, Jianfeng and Liew, Jun Hao and Yan, Hanshu and Liu, Jia-Wei and Zhang, Chenxu and Feng, Jiashi and Shou, Mike Zheng}, title = {MagicAnimate: Temporally Consistent Human Image Animation using Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1481-1490} }
Dense Vision Transformer Compression with Few Samples-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Hanxiao and Zhou, Yifan and Wang, Guo-Hua}, title = {Dense Vision Transformer Compression with Few Samples}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15825-15834} }
Masked AutoDecoder is Effective Multi-Task Vision Generalist-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qiu_2024_CVPR, author = {Qiu, Han and Huang, Jiaxing and Gao, Peng and Lu, Lewei and Zhang, Xiaoqin and Lu, Shijian}, title = {Masked AutoDecoder is Effective Multi-Task Vision Generalist}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14152-14161} }
Weakly Misalignment-free Adaptive Feature Alignment for UAVs-based Multimodal Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Chen and Qi, Jiahao and Liu, Xingyue and Bin, Kangcheng and Fu, Ruigang and Hu, Xikun and Zhong, Ping}, title = {Weakly Misalignment-free Adaptive Feature Alignment for UAVs-based Multimodal Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26836-26845} }
From Correspondences to Pose: Non-minimal Certifiably Optimal Relative Pose without Disambiguation-
[pdf]
[supp]
[bibtex]@InProceedings{Tirado-Garin_2024_CVPR, author = {Tirado-Gar{\'\i}n, Javier and Civera, Javier}, title = {From Correspondences to Pose: Non-minimal Certifiably Optimal Relative Pose without Disambiguation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {403-412} }
Passive Snapshot Coded Aperture Dual-Pixel RGB-D Imaging-
[pdf]
[supp]
[bibtex]@InProceedings{Ghanekar_2024_CVPR, author = {Ghanekar, Bhargav and Khan, Salman Siddique and Sharma, Pranav and Singh, Shreyas and Boominathan, Vivek and Mitra, Kaushik and Veeraraghavan, Ashok}, title = {Passive Snapshot Coded Aperture Dual-Pixel RGB-D Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25348-25357} }
Loose Inertial Poser: Motion Capture with IMU-attached Loose-Wear Jacket-
[pdf]
[supp]
[bibtex]@InProceedings{Zuo_2024_CVPR, author = {Zuo, Chengxu and Wang, Yiming and Zhan, Lishuang and Guo, Shihui and Yi, Xinyu and Xu, Feng and Qin, Yipeng}, title = {Loose Inertial Poser: Motion Capture with IMU-attached Loose-Wear Jacket}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2209-2219} }
Instance Tracking in 3D Scenes from Egocentric Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Yunhan and Ma, Haoyu and Kong, Shu and Fowlkes, Charless}, title = {Instance Tracking in 3D Scenes from Egocentric Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21933-21944} }
Correlation-aware Coarse-to-fine MLPs for Deformable Medical Image Registration-
[pdf]
[arXiv]
[bibtex]@InProceedings{Meng_2024_CVPR, author = {Meng, Mingyuan and Feng, Dagan and Bi, Lei and Kim, Jinman}, title = {Correlation-aware Coarse-to-fine MLPs for Deformable Medical Image Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9645-9654} }
Toward Generalist Anomaly Detection via In-context Residual Learning with Few-shot Sample Prompts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Jiawen and Pang, Guansong}, title = {Toward Generalist Anomaly Detection via In-context Residual Learning with Few-shot Sample Prompts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17826-17836} }
Fourier-basis Functions to Bridge Augmentation Gap: Rethinking Frequency Augmentation in Image Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vaish_2024_CVPR, author = {Vaish, Puru and Wang, Shunxin and Strisciuglio, Nicola}, title = {Fourier-basis Functions to Bridge Augmentation Gap: Rethinking Frequency Augmentation in Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17763-17772} }
Learning to Transform Dynamically for Better Adversarial Transferability-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Rongyi and Zhang, Zeliang and Liang, Susan and Liu, Zhuo and Xu, Chenliang}, title = {Learning to Transform Dynamically for Better Adversarial Transferability}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24273-24283} }
PlatoNeRF: 3D Reconstruction in Plato's Cave via Single-View Two-Bounce Lidar-
[pdf]
[supp]
[bibtex]@InProceedings{Klinghoffer_2024_CVPR, author = {Klinghoffer, Tzofi and Xiang, Xiaoyu and Somasundaram, Siddharth and Fan, Yuchen and Richardt, Christian and Raskar, Ramesh and Ranjan, Rakesh}, title = {PlatoNeRF: 3D Reconstruction in Plato's Cave via Single-View Two-Bounce Lidar}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14565-14574} }
PanoContext-Former: Panoramic Total Scene Understanding with a Transformer-
[pdf]
[supp]
[bibtex]@InProceedings{Dong_2024_CVPR, author = {Dong, Yuan and Fang, Chuan and Bo, Liefeng and Dong, Zilong and Tan, Ping}, title = {PanoContext-Former: Panoramic Total Scene Understanding with a Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28087-28097} }
Training-Free Pretrained Model Merging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Zhengqi and Yuan, Ke and Wang, Huiqiong and Wang, Yong and Song, Mingli and Song, Jie}, title = {Training-Free Pretrained Model Merging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5915-5925} }
NC-SDF: Enhancing Indoor Scene Reconstruction Using Neural SDFs with View-Dependent Normal Compensation-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Ziyi and Wu, Xiaolong and Zhang, Yu}, title = {NC-SDF: Enhancing Indoor Scene Reconstruction Using Neural SDFs with View-Dependent Normal Compensation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5155-5165} }
An Interactive Navigation Method with Effect-oriented Affordance-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Xiaohan and Liu, Yuehu and Song, Xinhang and Liu, Yuyi and Zhang, Sixian and Jiang, Shuqiang}, title = {An Interactive Navigation Method with Effect-oriented Affordance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16446-16456} }
Person in Place: Generating Associative Skeleton-Guidance Maps for Human-Object Interaction Image Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, ChangHee and Kang, ChanHee and Kong, Kyeongbo and Oh, Hanni and Kang, Suk-Ju}, title = {Person in Place: Generating Associative Skeleton-Guidance Maps for Human-Object Interaction Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8164-8175} }
PREGO: Online Mistake Detection in PRocedural EGOcentric Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Flaborea_2024_CVPR, author = {Flaborea, Alessandro and di Melendugno, Guido Maria D'Amely and Plini, Leonardo and Scofano, Luca and De Matteis, Edoardo and Furnari, Antonino and Farinella, Giovanni Maria and Galasso, Fabio}, title = {PREGO: Online Mistake Detection in PRocedural EGOcentric Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18483-18492} }
ChatPose: Chatting about 3D Human Pose-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2024_CVPR, author = {Feng, Yao and Lin, Jing and Dwivedi, Sai Kumar and Sun, Yu and Patel, Priyanka and Black, Michael J.}, title = {ChatPose: Chatting about 3D Human Pose}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2093-2103} }
Prompt3D: Random Prompt Assisted Weakly-Supervised 3D Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Xiaohong and Ye, Huisheng and Li, Jingwen and Tang, Qinyu and Li, Yuanqi and Guo, Yanwen and Guo, Jie}, title = {Prompt3D: Random Prompt Assisted Weakly-Supervised 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28046-28055} }
Logit Standardization in Knowledge Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2024_CVPR, author = {Sun, Shangquan and Ren, Wenqi and Li, Jingzhi and Wang, Rui and Cao, Xiaochun}, title = {Logit Standardization in Knowledge Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15731-15740} }
Fine-grained Prototypical Voting with Heterogeneous Mixup for Semi-supervised 2D-3D Cross-modal Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Fan and Hua, Xian-Sheng and Chen, Chong and Luo, Xiao}, title = {Fine-grained Prototypical Voting with Heterogeneous Mixup for Semi-supervised 2D-3D Cross-modal Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17016-17026} }
Leak and Learn: An Attacker's Cookbook to Train Using Leaked Data from Federated Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Joshua C. and Dabholkar, Ahaan and Sharma, Atul and Bagchi, Saurabh}, title = {Leak and Learn: An Attacker's Cookbook to Train Using Leaked Data from Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12247-12256} }
OCAI: Improving Optical Flow Estimation by Occlusion and Consistency Aware Interpolation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeong_2024_CVPR, author = {Jeong, Jisoo and Cai, Hong and Garrepalli, Risheek and Lin, Jamie Menjay and Hayat, Munawar and Porikli, Fatih}, title = {OCAI: Improving Optical Flow Estimation by Occlusion and Consistency Aware Interpolation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19352-19362} }
Distilling ODE Solvers of Diffusion Models into Smaller Steps-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Sanghwan and Tang, Hao and Yu, Fisher}, title = {Distilling ODE Solvers of Diffusion Models into Smaller Steps}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9410-9419} }
Navigating Beyond Dropout: An Intriguing Solution towards Generalizable Image Super Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Hongjun and Chen, Jiyuan and Zheng, Yinqiang and Zeng, Tieyong}, title = {Navigating Beyond Dropout: An Intriguing Solution towards Generalizable Image Super Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25532-25543} }
Doodle Your 3D: From Abstract Freehand Sketches to Precise 3D Shapes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bandyopadhyay_2024_CVPR, author = {Bandyopadhyay, Hmrishav and Koley, Subhadeep and Das, Ayan and Bhunia, Ayan Kumar and Sain, Aneeshan and Chowdhury, Pinaki Nath and Xiang, Tao and Song, Yi-Zhe}, title = {Doodle Your 3D: From Abstract Freehand Sketches to Precise 3D Shapes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9795-9805} }
LightIt: Illumination Modeling and Control for Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kocsis_2024_CVPR, author = {Kocsis, Peter and Philip, Julien and Sunkavalli, Kalyan and Nie{\ss}ner, Matthias and Hold-Geoffroy, Yannick}, title = {LightIt: Illumination Modeling and Control for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9359-9369} }
Single View Refractive Index Tomography with Neural Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Brandon and Levis, Aviad and Connor, Liam and Srinivasan, Pratul P. and Bouman, Katherine L.}, title = {Single View Refractive Index Tomography with Neural Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25358-25367} }
Neural Lineage-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Runpeng and Wang, Xinchao}, title = {Neural Lineage}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4797-4807} }
Visual Layout Composer: Image-Vector Dual Diffusion Model for Design Layout Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Shabani_2024_CVPR, author = {Shabani, Mohammad Amin and Wang, Zhaowen and Liu, Difan and Zhao, Nanxuan and Yang, Jimei and Furukawa, Yasutaka}, title = {Visual Layout Composer: Image-Vector Dual Diffusion Model for Design Layout Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9222-9231} }
FC-GNN: Recovering Reliable and Accurate Correspondences from Interferences-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Haobo and Zhou, Jun and Yang, Hua and Pan, Renjie and Li, Cunyan}, title = {FC-GNN: Recovering Reliable and Accurate Correspondences from Interferences}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25213-25222} }
Turb-Seg-Res: A Segment-then-Restore Pipeline for Dynamic Videos with Atmospheric Turbulence-
[pdf]
[supp]
[bibtex]@InProceedings{Saha_2024_CVPR, author = {Saha, Ripon Kumar and Qin, Dehao and Li, Nianyi and Ye, Jinwei and Jayasuriya, Suren}, title = {Turb-Seg-Res: A Segment-then-Restore Pipeline for Dynamic Videos with Atmospheric Turbulence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25286-25296} }
Real-time Acquisition and Reconstruction of Dynamic Volumes with Neural Structured Illumination-
[pdf]
[supp]
[bibtex]@InProceedings{Zeng_2024_CVPR, author = {Zeng, Yixin and Bi, Zoubin and Yin, Mingrui and Feng, Xiang and Zhou, Kun and Wu, Hongzhi}, title = {Real-time Acquisition and Reconstruction of Dynamic Volumes with Neural Structured Illumination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20186-20195} }
3D Multi-frame Fusion for Video Stabilization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2024_CVPR, author = {Peng, Zhan and Ye, Xinyi and Zhao, Weiyue and Liu, Tianqi and Sun, Huiqiang and Li, Baopu and Cao, Zhiguo}, title = {3D Multi-frame Fusion for Video Stabilization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7507-7516} }
Local-consistent Transformation Learning for Rotation-invariant Point Cloud Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Yiyang and Duan, Lunhao and Zhao, Shanshan and Ding, Changxing and Tao, Dacheng}, title = {Local-consistent Transformation Learning for Rotation-invariant Point Cloud Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5418-5427} }
Tailored Visions: Enhancing Text-to-Image Generation with Personalized Prompt Rewriting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Zijie and Zhang, Lichao and Weng, Fangsheng and Pan, Lili and Lan, Zhenzhong}, title = {Tailored Visions: Enhancing Text-to-Image Generation with Personalized Prompt Rewriting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7727-7736} }
Efficient Deformable ConvNets: Rethinking Dynamic and Sparse Operator for Vision Applications-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiong_2024_CVPR, author = {Xiong, Yuwen and Li, Zhiqi and Chen, Yuntao and Wang, Feng and Zhu, Xizhou and Luo, Jiapeng and Wang, Wenhai and Lu, Tong and Li, Hongsheng and Qiao, Yu and Lu, Lewei and Zhou, Jie and Dai, Jifeng}, title = {Efficient Deformable ConvNets: Rethinking Dynamic and Sparse Operator for Vision Applications}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5652-5661} }
CoDe: An Explicit Content Decoupling Framework for Image Restoration-
[pdf]
[supp]
[bibtex]@InProceedings{Gu_2024_CVPR, author = {Gu, Enxuan and Ge, Hongwei and Guo, Yong}, title = {CoDe: An Explicit Content Decoupling Framework for Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2920-2930} }
XFibrosis: Explicit Vessel-Fiber Modeling for Fibrosis Staging from Liver Pathology Images-
[pdf]
[supp]
[bibtex]@InProceedings{Yin_2024_CVPR, author = {Yin, Chong and Liu, Siqi and Lyu, Fei and Lu, Jiahao and Darkner, Sune and Wong, Vincent Wai-Sun and Yuen, Pong C.}, title = {XFibrosis: Explicit Vessel-Fiber Modeling for Fibrosis Staging from Liver Pathology Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11282-11291} }
UnO: Unsupervised Occupancy Fields for Perception and Forecasting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Agro_2024_CVPR, author = {Agro, Ben and Sykora, Quinlan and Casas, Sergio and Gilles, Thomas and Urtasun, Raquel}, title = {UnO: Unsupervised Occupancy Fields for Perception and Forecasting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14487-14496} }
SpatialVLM: Endowing Vision-Language Models with Spatial Reasoning Capabilities-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Boyuan and Xu, Zhuo and Kirmani, Sean and Ichter, Brain and Sadigh, Dorsa and Guibas, Leonidas and Xia, Fei}, title = {SpatialVLM: Endowing Vision-Language Models with Spatial Reasoning Capabilities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14455-14465} }
InstructDiffusion: A Generalist Modeling Interface for Vision Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Geng_2024_CVPR, author = {Geng, Zigang and Yang, Binxin and Hang, Tiankai and Li, Chen and Gu, Shuyang and Zhang, Ting and Bao, Jianmin and Zhang, Zheng and Li, Houqiang and Hu, Han and Chen, Dong and Guo, Baining}, title = {InstructDiffusion: A Generalist Modeling Interface for Vision Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12709-12720} }
DreamVideo: Composing Your Dream Videos with Customized Subject and Motion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2024_CVPR, author = {Wei, Yujie and Zhang, Shiwei and Qing, Zhiwu and Yuan, Hangjie and Liu, Zhiheng and Liu, Yu and Zhang, Yingya and Zhou, Jingren and Shan, Hongming}, title = {DreamVideo: Composing Your Dream Videos with Customized Subject and Motion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6537-6549} }
Gated Fields: Learning Scene Reconstruction from Gated Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ramazzina_2024_CVPR, author = {Ramazzina, Andrea and Walz, Stefanie and Dahal, Pragyan and Bijelic, Mario and Heide, Felix}, title = {Gated Fields: Learning Scene Reconstruction from Gated Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10530-10541} }
RadarDistill: Boosting Radar-based Object Detection Performance via Knowledge Distillation from LiDAR Features-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bang_2024_CVPR, author = {Bang, Geonho and Choi, Kwangjin and Kim, Jisong and Kum, Dongsuk and Choi, Jun Won}, title = {RadarDistill: Boosting Radar-based Object Detection Performance via Knowledge Distillation from LiDAR Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15491-15500} }
Probabilistic Sampling of Balanced K-Means using Adiabatic Quantum Computing-
[pdf]
[supp]
[bibtex]@InProceedings{Zaech_2024_CVPR, author = {Zaech, Jan-Nico and Danelljan, Martin and Birdal, Tolga and Van Gool, Luc}, title = {Probabilistic Sampling of Balanced K-Means using Adiabatic Quantum Computing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26191-26201} }
UniPT: Universal Parallel Tuning for Transfer Learning with Efficient Parameter and Memory-
[pdf]
[arXiv]
[bibtex]@InProceedings{Diao_2024_CVPR, author = {Diao, Haiwen and Wan, Bo and Zhang, Ying and Jia, Xu and Lu, Huchuan and Chen, Long}, title = {UniPT: Universal Parallel Tuning for Transfer Learning with Efficient Parameter and Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28729-28740} }
Composed Video Retrieval via Enriched Context and Discriminative Embeddings-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Thawakar_2024_CVPR, author = {Thawakar, Omkar and Naseer, Muzammal and Anwer, Rao Muhammad and Khan, Salman and Felsberg, Michael and Shah, Mubarak and Khan, Fahad Shahbaz}, title = {Composed Video Retrieval via Enriched Context and Discriminative Embeddings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26896-26906} }
Using Human Feedback to Fine-tune Diffusion Models without Any Reward Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Kai and Tao, Jian and Lyu, Jiafei and Ge, Chunjiang and Chen, Jiaxin and Shen, Weihan and Zhu, Xiaolong and Li, Xiu}, title = {Using Human Feedback to Fine-tune Diffusion Models without Any Reward Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8941-8951} }
Perceptual Assessment and Optimization of HDR Image Rendering-
[pdf]
[arXiv]
[bibtex]@InProceedings{Cao_2024_CVPR, author = {Cao, Peibei and Mantiuk, Rafal K. and Ma, Kede}, title = {Perceptual Assessment and Optimization of HDR Image Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22433-22443} }
Multiview Aerial Visual RECognition (MAVREC): Can Multi-view Improve Aerial Visual Perception?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dutta_2024_CVPR, author = {Dutta, Aritra and Das, Srijan and Nielsen, Jacob and Chakraborty, Rajatsubhra and Shah, Mubarak}, title = {Multiview Aerial Visual RECognition (MAVREC): Can Multi-view Improve Aerial Visual Perception?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22678-22690} }
Diffusion-driven GAN Inversion for Multi-Modal Face Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Jihyun and Oh, Changjae and Do, Hoseok and Kim, Soohyun and Sohn, Kwanghoon}, title = {Diffusion-driven GAN Inversion for Multi-Modal Face Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10403-10412} }
Low-Rank Knowledge Decomposition for Medical Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Yuhang and Li, Haolin and Du, Siyuan and Yao, Jiangchao and Zhang, Ya and Wang, Yanfeng}, title = {Low-Rank Knowledge Decomposition for Medical Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11611-11620} }
SaCo Loss: Sample-wise Affinity Consistency for Vision-Language Pre-training-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Sitong and Tan, Haoru and Tian, Zhuotao and Chen, Yukang and Qi, Xiaojuan and Jia, Jiaya}, title = {SaCo Loss: Sample-wise Affinity Consistency for Vision-Language Pre-training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27358-27369} }
Steganographic Passport: An Owner and User Verifiable Credential for Deep Model IP Protection Without Retraining-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cui_2024_CVPR, author = {Cui, Qi and Meng, Ruohan and Xu, Chaohui and Chang, Chip-Hong}, title = {Steganographic Passport: An Owner and User Verifiable Credential for Deep Model IP Protection Without Retraining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12302-12311} }
Stable Neighbor Denoising for Source-free Domain Adaptive Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Dong and Wang, Shuang and Zang, Qi and Jiao, Licheng and Sebe, Nicu and Zhong, Zhun}, title = {Stable Neighbor Denoising for Source-free Domain Adaptive Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23416-23427} }
SynSP: Synergy of Smoothness and Precision in Pose Sequences Refinement-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Tao and Jin, Lei and Wang, Zheng and Li, Jianshu and Li, Liang and Zhao, Fang and Cheng, Yu and Yuan, Li and Zhou, Li and Xing, Junliang and Zhao, Jian}, title = {SynSP: Synergy of Smoothness and Precision in Pose Sequences Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1824-1833} }
En3D: An Enhanced Generative Model for Sculpting 3D Humans from 2D Synthetic Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Men_2024_CVPR, author = {Men, Yifang and Lei, Biwen and Yao, Yuan and Cui, Miaomiao and Lian, Zhouhui and Xie, Xuansong}, title = {En3D: An Enhanced Generative Model for Sculpting 3D Humans from 2D Synthetic Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9981-9991} }
Neural Visibility Field for Uncertainty-Driven Active Mapping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xue_2024_CVPR, author = {Xue, Shangjie and Dill, Jesse and Mathur, Pranay and Dellaert, Frank and Tsiotra, Panagiotis and Xu, Danfei}, title = {Neural Visibility Field for Uncertainty-Driven Active Mapping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18122-18132} }
Tri-Perspective View Decomposition for Geometry-Aware Depth Completion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2024_CVPR, author = {Yan, Zhiqiang and Lin, Yuankai and Wang, Kun and Zheng, Yupeng and Wang, Yufei and Zhang, Zhenyu and Li, Jun and Yang, Jian}, title = {Tri-Perspective View Decomposition for Geometry-Aware Depth Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4874-4884} }
Boosting Adversarial Training via Fisher-Rao Norm-based Regularization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yin_2024_CVPR, author = {Yin, Xiangyu and Ruan, Wenjie}, title = {Boosting Adversarial Training via Fisher-Rao Norm-based Regularization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24544-24553} }
Learned Representation-Guided Diffusion Models for Large-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Graikos_2024_CVPR, author = {Graikos, Alexandros and Yellapragada, Srikar and Le, Minh-Quan and Kapse, Saarthak and Prasanna, Prateek and Saltz, Joel and Samaras, Dimitris}, title = {Learned Representation-Guided Diffusion Models for Large-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8532-8542} }
DAVE - A Detect-and-Verify Paradigm for Low-Shot Counting-
[pdf]
[bibtex]@InProceedings{Pelhan_2024_CVPR, author = {Pelhan, Jer and Luke\v{z}i?, Alan and Zavrtanik, Vitjan and Kristan, Matej}, title = {DAVE - A Detect-and-Verify Paradigm for Low-Shot Counting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23293-23302} }
Ranni: Taming Text-to-Image Diffusion for Accurate Instruction Following-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2024_CVPR, author = {Feng, Yutong and Gong, Biao and Chen, Di and Shen, Yujun and Liu, Yu and Zhou, Jingren}, title = {Ranni: Taming Text-to-Image Diffusion for Accurate Instruction Following}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4744-4753} }
Relaxed Contrastive Learning for Federated Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Seo_2024_CVPR, author = {Seo, Seonguk and Kim, Jinkyu and Kim, Geeho and Han, Bohyung}, title = {Relaxed Contrastive Learning for Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12279-12288} }
Direct2.5: Diverse Text-to-3D Generation via Multi-view 2.5D Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2024_CVPR, author = {Lu, Yuanxun and Zhang, Jingyang and Li, Shiwei and Fang, Tian and McKinnon, David and Tsin, Yanghai and Quan, Long and Cao, Xun and Yao, Yao}, title = {Direct2.5: Diverse Text-to-3D Generation via Multi-view 2.5D Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8744-8753} }
Efficient LoFTR: Semi-Dense Local Feature Matching with Sparse-Like Speed-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Yifan and He, Xingyi and Peng, Sida and Tan, Dongli and Zhou, Xiaowei}, title = {Efficient LoFTR: Semi-Dense Local Feature Matching with Sparse-Like Speed}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21666-21675} }
Contextual Augmented Global Contrast for Multimodal Intent Recognition-
[pdf]
[bibtex]@InProceedings{Sun_2024_CVPR, author = {Sun, Kaili and Xie, Zhiwen and Ye, Mang and Zhang, Huyin}, title = {Contextual Augmented Global Contrast for Multimodal Intent Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26963-26973} }
Pre-trained Model Guided Fine-Tuning for Zero-Shot Adversarial Robustness-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Sibo and Zhang, Jie and Yuan, Zheng and Shan, Shiguang}, title = {Pre-trained Model Guided Fine-Tuning for Zero-Shot Adversarial Robustness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24502-24511} }
MatFuse: Controllable Material Generation with Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vecchio_2024_CVPR, author = {Vecchio, Giuseppe and Sortino, Renato and Palazzo, Simone and Spampinato, Concetto}, title = {MatFuse: Controllable Material Generation with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4429-4438} }
CoGS: Controllable Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Heng and Julin, Joel and Milacski, Zolt\'an A. and Niinuma, Koichiro and Jeni, L\'aszl\'o A.}, title = {CoGS: Controllable Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21624-21633} }
Partial-to-Partial Shape Matching with Geometric Consistency-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ehm_2024_CVPR, author = {Ehm, Viktoria and Gao, Maolin and Roetzer, Paul and Eisenberger, Marvin and Cremers, Daniel and Bernard, Florian}, title = {Partial-to-Partial Shape Matching with Geometric Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27488-27497} }
Descriptor and Word Soups: Overcoming the Parameter Efficiency Accuracy Tradeoff for Out-of-Distribution Few-shot Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liao_2024_CVPR, author = {Liao, Christopher and Tsiligkaridis, Theodoros and Kulis, Brian}, title = {Descriptor and Word Soups: Overcoming the Parameter Efficiency Accuracy Tradeoff for Out-of-Distribution Few-shot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27015-27025} }
Harnessing the Power of MLLMs for Transferable Text-to-Image Person ReID-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tan_2024_CVPR, author = {Tan, Wentan and Ding, Changxing and Jiang, Jiayu and Wang, Fei and Zhan, Yibing and Tao, Dapeng}, title = {Harnessing the Power of MLLMs for Transferable Text-to-Image Person ReID}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17127-17137} }
360+x: A Panoptic Multi-modal Scene Understanding Dataset-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Hao and Hou, Yuqi and Qu, Chenyuan and Testini, Irene and Hong, Xiaohan and Jiao, Jianbo}, title = {360+x: A Panoptic Multi-modal Scene Understanding Dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19373-19382} }
Weakly Supervised Video Individual Counting-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Xinyan and Li, Guorong and Qi, Yuankai and Yan, Ziheng and Han, Zhenjun and van den Hengel, Anton and Yang, Ming-Hsuan and Huang, Qingming}, title = {Weakly Supervised Video Individual Counting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19228-19237} }
Gaussian Shading: Provable Performance-Lossless Image Watermarking for Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Zijin and Zeng, Kai and Chen, Kejiang and Fang, Han and Zhang, Weiming and Yu, Nenghai}, title = {Gaussian Shading: Provable Performance-Lossless Image Watermarking for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12162-12171} }
Generalized Event Cameras-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sundar_2024_CVPR, author = {Sundar, Varun and Dutson, Matthew and Ardelean, Andrei and Bruschini, Claudio and Charbon, Edoardo and Gupta, Mohit}, title = {Generalized Event Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25007-25017} }
3D Neural Edge Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Lei and Peng, Songyou and Yu, Zehao and Liu, Shaohui and Pautrat, R\'emi and Yin, Xiaochuan and Pollefeys, Marc}, title = {3D Neural Edge Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21219-21229} }
DocRes: A Generalist Model Toward Unifying Document Image Restoration Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Jiaxin and Peng, Dezhi and Liu, Chongyu and Zhang, Peirong and Jin, Lianwen}, title = {DocRes: A Generalist Model Toward Unifying Document Image Restoration Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15654-15664} }
Honeybee: Locality-enhanced Projector for Multimodal LLM-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cha_2024_CVPR, author = {Cha, Junbum and Kang, Wooyoung and Mun, Jonghwan and Roh, Byungseok}, title = {Honeybee: Locality-enhanced Projector for Multimodal LLM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13817-13827} }
Learned Trajectory Embedding for Subspace Clustering-
[pdf]
[supp]
[bibtex]@InProceedings{Lochman_2024_CVPR, author = {Lochman, Yaroslava and Olsson, Carl and Zach, Christopher}, title = {Learned Trajectory Embedding for Subspace Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19092-19102} }
Training Vision Transformers for Semi-Supervised Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2024_CVPR, author = {Hu, Xinting and Jiang, Li and Schiele, Bernt}, title = {Training Vision Transformers for Semi-Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4007-4017} }
HarmonyView: Harmonizing Consistency and Diversity in One-Image-to-3D-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Woo_2024_CVPR, author = {Woo, Sangmin and Park, Byeongjun and Go, Hyojun and Kim, Jin-Young and Kim, Changick}, title = {HarmonyView: Harmonizing Consistency and Diversity in One-Image-to-3D}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10574-10584} }
DGC-GNN: Leveraging Geometry and Color Cues for Visual Descriptor-Free 2D-3D Matching-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Shuzhe and Kannala, Juho and Barath, Daniel}, title = {DGC-GNN: Leveraging Geometry and Color Cues for Visual Descriptor-Free 2D-3D Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20881-20891} }
CuVLER: Enhanced Unsupervised Object Discoveries through Exhaustive Self-Supervised Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Arica_2024_CVPR, author = {Arica, Shahaf and Rubin, Or and Gershov, Sapir and Laufer, Shlomi}, title = {CuVLER: Enhanced Unsupervised Object Discoveries through Exhaustive Self-Supervised Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23105-23114} }
Quantifying Task Priority for Multi-Task Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeong_2024_CVPR, author = {Jeong, Wooseong and Yoon, Kuk-Jin}, title = {Quantifying Task Priority for Multi-Task Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {363-372} }
UnSAMFlow: Unsupervised Optical Flow Guided by Segment Anything Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2024_CVPR, author = {Yuan, Shuai and Luo, Lei and Hui, Zhuo and Pu, Can and Xiang, Xiaoyu and Ranjan, Rakesh and Demandolx, Denis}, title = {UnSAMFlow: Unsupervised Optical Flow Guided by Segment Anything Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19027-19037} }
Exploiting Inter-sample and Inter-feature Relations in Dataset Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deng_2024_CVPR, author = {Deng, Wenxiao and Li, Wenbin and Ding, Tianyu and Wang, Lei and Zhang, Hongguang and Huang, Kuihua and Huo, Jing and Gao, Yang}, title = {Exploiting Inter-sample and Inter-feature Relations in Dataset Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17057-17066} }
On the Scalability of Diffusion-based Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Hao and Zou, Yang and Wang, Ying and Majumder, Orchid and Xie, Yusheng and Manmatha, R. and Swaminathan, Ashwin and Tu, Zhuowen and Ermon, Stefano and Soatto, Stefano}, title = {On the Scalability of Diffusion-based Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9400-9409} }
Entity-NeRF: Detecting and Removing Moving Entities in Urban Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Otonari_2024_CVPR, author = {Otonari, Takashi and Ikehata, Satoshi and Aizawa, Kiyoharu}, title = {Entity-NeRF: Detecting and Removing Moving Entities in Urban Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20892-20901} }
TAMM: TriAdapter Multi-Modal Learning for 3D Shape Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Zhihao and Cao, Shengcao and Wang, Yu-Xiong}, title = {TAMM: TriAdapter Multi-Modal Learning for 3D Shape Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21413-21423} }
GauHuman: Articulated Gaussian Splatting from Monocular Human Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2024_CVPR, author = {Hu, Shoukang and Hu, Tao and Liu, Ziwei}, title = {GauHuman: Articulated Gaussian Splatting from Monocular Human Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20418-20431} }
AnySkill: Learning Open-Vocabulary Physical Skill for Interactive Agents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cui_2024_CVPR, author = {Cui, Jieming and Liu, Tengyu and Liu, Nian and Yang, Yaodong and Zhu, Yixin and Huang, Siyuan}, title = {AnySkill: Learning Open-Vocabulary Physical Skill for Interactive Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {852-862} }
EGTR: Extracting Graph from Transformer for Scene Graph Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Im_2024_CVPR, author = {Im, Jinbae and Nam, JeongYeon and Park, Nokyung and Lee, Hyungmin and Park, Seunghyun}, title = {EGTR: Extracting Graph from Transformer for Scene Graph Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24229-24238} }
Generative Unlearning for Any Identity-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Seo_2024_CVPR, author = {Seo, Juwon and Lee, Sung-Hoon and Lee, Tae-Young and Moon, Seungjun and Park, Gyeong-Moon}, title = {Generative Unlearning for Any Identity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9151-9161} }
Context-based and Diversity-driven Specificity in Compositional Zero-Shot Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Yun and Liu, Zhe and Chen, Hang and Yao, Lina}, title = {Context-based and Diversity-driven Specificity in Compositional Zero-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17037-17046} }
FlowVid: Taming Imperfect Optical Flows for Consistent Video-to-Video Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2024_CVPR, author = {Liang, Feng and Wu, Bichen and Wang, Jialiang and Yu, Licheng and Li, Kunpeng and Zhao, Yinan and Misra, Ishan and Huang, Jia-Bin and Zhang, Peizhao and Vajda, Peter and Marculescu, Diana}, title = {FlowVid: Taming Imperfect Optical Flows for Consistent Video-to-Video Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8207-8216} }
StyleCineGAN: Landscape Cinemagraph Generation using a Pre-trained StyleGAN-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2024_CVPR, author = {Choi, Jongwoo and Seo, Kwanggyoon and Ashtari, Amirsaman and Noh, Junyong}, title = {StyleCineGAN: Landscape Cinemagraph Generation using a Pre-trained StyleGAN}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7872-7881} }
Rethinking Multi-domain Generalization with A General Learning Objective-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tan_2024_CVPR, author = {Tan, Zhaorui and Yang, Xi and Huang, Kaizhu}, title = {Rethinking Multi-domain Generalization with A General Learning Objective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23512-23522} }
Laplacian-guided Entropy Model in Neural Codec with Blur-dissipated Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Khoshkhahtinat_2024_CVPR, author = {Khoshkhahtinat, Atefeh and Zafari, Ali and Mehta, Piyush M. and Nasrabadi, Nasser M.}, title = {Laplacian-guided Entropy Model in Neural Codec with Blur-dissipated Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3045-3054} }
Universal Novelty Detection Through Adaptive Contrastive Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mirzaei_2024_CVPR, author = {Mirzaei, Hossein and Nafez, Mojtaba and Jafari, Mohammad and Soltani, Mohammad Bagher and Azizmalayeri, Mohammad and Habibi, Jafar and Sabokrou, Mohammad and Rohban, Mohammad Hossein}, title = {Universal Novelty Detection Through Adaptive Contrastive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22914-22923} }
Rethinking Diffusion Model for Multi-Contrast MRI Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Guangyuan and Rao, Chen and Mo, Juncheng and Zhang, Zhanjie and Xing, Wei and Zhao, Lei}, title = {Rethinking Diffusion Model for Multi-Contrast MRI Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11365-11374} }
Resurrecting Old Classes with New Data for Exemplar-Free Continual Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Goswami_2024_CVPR, author = {Goswami, Dipam and Soutif-Cormerais, Albin and Liu, Yuyang and Kamath, Sandesh and Twardowski, Bart?omiej and van de Weijer, Joost}, title = {Resurrecting Old Classes with New Data for Exemplar-Free Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28525-28534} }
Unknown Prompt the only Lacuna: Unveiling CLIP's Potential for Open Domain Generalization-
[pdf]
[supp]
[bibtex]@InProceedings{Singha_2024_CVPR, author = {Singha, Mainak and Jha, Ankit and Bose, Shirsha and Nair, Ashwin and Abdar, Moloud and Banerjee, Biplab}, title = {Unknown Prompt the only Lacuna: Unveiling CLIP's Potential for Open Domain Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13309-13319} }
Poly Kernel Inception Network for Remote Sensing Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Cai_2024_CVPR, author = {Cai, Xinhao and Lai, Qiuxia and Wang, Yuwei and Wang, Wenguan and Sun, Zeren and Yao, Yazhou}, title = {Poly Kernel Inception Network for Remote Sensing Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27706-27716} }
RMT: Retentive Networks Meet Vision Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2024_CVPR, author = {Fan, Qihang and Huang, Huaibo and Chen, Mingrui and Liu, Hongmin and He, Ran}, title = {RMT: Retentive Networks Meet Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5641-5651} }
From Coarse to Fine-Grained Open-Set Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Lang_2024_CVPR, author = {Lang, Nico and Sn{\ae}bjarnarson, V\'esteinn and Cole, Elijah and Mac Aodha, Oisin and Igel, Christian and Belongie, Serge}, title = {From Coarse to Fine-Grained Open-Set Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17804-17814} }
Multimodal Pathway: Improve Transformers with Irrelevant Data from Other Modalities-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yiyuan and Ding, Xiaohan and Gong, Kaixiong and Ge, Yixiao and Shan, Ying and Yue, Xiangyu}, title = {Multimodal Pathway: Improve Transformers with Irrelevant Data from Other Modalities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6108-6117} }
FaceChain-ImagineID: Freely Crafting High-Fidelity Diverse Talking Faces from Disentangled Audio-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Chao and Liu, Yang and Xing, Jiazheng and Wang, Weida and Sun, Mingze and Dan, Jun and Huang, Tianxin and Li, Siyuan and Cheng, Zhi-Qi and Tai, Ying and Sun, Baigui}, title = {FaceChain-ImagineID: Freely Crafting High-Fidelity Diverse Talking Faces from Disentangled Audio}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1292-1302} }
OmniViD: A Generative Framework for Universal Video Understanding-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Junke and Chen, Dongdong and Luo, Chong and He, Bo and Yuan, Lu and Wu, Zuxuan and Jiang, Yu-Gang}, title = {OmniViD: A Generative Framework for Universal Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18209-18220} }
Naturally Supervised 3D Visual Grounding with Language-Regularized Concept Learners-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2024_CVPR, author = {Feng, Chun and Hsu, Joy and Liu, Weiyu and Wu, Jiajun}, title = {Naturally Supervised 3D Visual Grounding with Language-Regularized Concept Learners}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13269-13278} }
SSR-Encoder: Encoding Selective Subject Representation for Subject-Driven Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yuxuan and Song, Yiren and Liu, Jiaming and Wang, Rui and Yu, Jinpeng and Tang, Hao and Li, Huaxia and Tang, Xu and Hu, Yao and Pan, Han and Jing, Zhongliang}, title = {SSR-Encoder: Encoding Selective Subject Representation for Subject-Driven Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8069-8078} }
CA-Jaccard: Camera-aware Jaccard Distance for Person Re-identification-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Yiyu and Fan, Zheyi and Chen, Zhaoru and Zhu, Yixuan}, title = {CA-Jaccard: Camera-aware Jaccard Distance for Person Re-identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17532-17541} }
Dual Prior Unfolding for Snapshot Compressive Imaging-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Jiancheng and Zeng, Haijin and Cao, Jiezhang and Chen, Yongyong and Yu, Dengxiu and Zhao, Yin-Ping}, title = {Dual Prior Unfolding for Snapshot Compressive Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25742-25752} }
COLMAP-Free 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fu_2024_CVPR, author = {Fu, Yang and Liu, Sifei and Kulkarni, Amey and Kautz, Jan and Efros, Alexei A. and Wang, Xiaolong}, title = {COLMAP-Free 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20796-20805} }
MVIP-NeRF: Multi-view 3D Inpainting on NeRF Scenes via Diffusion Prior-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Honghua and Loy, Chen Change and Pan, Xingang}, title = {MVIP-NeRF: Multi-view 3D Inpainting on NeRF Scenes via Diffusion Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5344-5353} }
StegoGAN: Leveraging Steganography for Non-Bijective Image-to-Image Translation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Sidi and Chen, Yizi and Mermet, Samuel and Hurni, Lorenz and Schindler, Konrad and Gonthier, Nicolas and Landrieu, Loic}, title = {StegoGAN: Leveraging Steganography for Non-Bijective Image-to-Image Translation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7922-7931} }
M&M VTO: Multi-Garment Virtual Try-On and Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Luyang and Li, Yingwei and Liu, Nan and Peng, Hao and Yang, Dawei and Kemelmacher-Shlizerman, Ira}, title = {M\&M VTO: Multi-Garment Virtual Try-On and Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1346-1356} }
AutoAD III: The Prequel - Back to the Pixels-
[pdf]
[bibtex]@InProceedings{Han_2024_CVPR, author = {Han, Tengda and Bain, Max and Nagrani, Arsha and Varol, G\"ul and Xie, Weidi and Zisserman, Andrew}, title = {AutoAD III: The Prequel - Back to the Pixels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18164-18174} }
Characteristics Matching Based Hash Codes Generation for Efficient Fine-grained Image Retrieval-
[pdf]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Zhen-Duo and Zhao, Li-Jun and Zhang, Zi-Chao and Luo, Xin and Xu, Xin-Shun}, title = {Characteristics Matching Based Hash Codes Generation for Efficient Fine-grained Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17273-17281} }
BadCLIP: Dual-Embedding Guided Backdoor Attack on Multimodal Contrastive Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2024_CVPR, author = {Liang, Siyuan and Zhu, Mingli and Liu, Aishan and Wu, Baoyuan and Cao, Xiaochun and Chang, Ee-Chien}, title = {BadCLIP: Dual-Embedding Guided Backdoor Attack on Multimodal Contrastive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24645-24654} }
Dynamic Inertial Poser (DynaIP): Part-Based Motion Dynamics Learning for Enhanced Human Pose Estimation with Sparse Inertial Sensors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yu and Xia, Songpengcheng and Chu, Lei and Yang, Jiarui and Wu, Qi and Pei, Ling}, title = {Dynamic Inertial Poser (DynaIP): Part-Based Motion Dynamics Learning for Enhanced Human Pose Estimation with Sparse Inertial Sensors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1889-1899} }
Matching 2D Images in 3D: Metric Relative Pose from Metric Correspondences-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Barroso-Laguna_2024_CVPR, author = {Barroso-Laguna, Axel and Munukutla, Sowmya and Prisacariu, Victor Adrian and Brachmann, Eric}, title = {Matching 2D Images in 3D: Metric Relative Pose from Metric Correspondences}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4852-4863} }
Efficient Vision-Language Pre-training by Cluster Masking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2024_CVPR, author = {Wei, Zihao and Pan, Zixuan and Owens, Andrew}, title = {Efficient Vision-Language Pre-training by Cluster Masking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26815-26825} }
GraCo: Granularity-Controllable Interactive Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Yian and Li, Kehan and Cheng, Zesen and Qiao, Pengchong and Zheng, Xiawu and Ji, Rongrong and Liu, Chang and Yuan, Li and Chen, Jie}, title = {GraCo: Granularity-Controllable Interactive Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3501-3510} }
M3-UDA: A New Benchmark for Unsupervised Domain Adaptive Fetal Cardiac Structure Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Pu_2024_CVPR, author = {Pu, Bin and Wang, Liwen and Yang, Jiewen and He, Guannan and Dong, Xingbo and Li, Shengli and Tan, Ying and Chen, Ming and Jin, Zhe and Li, Kenli and Li, Xiaomeng}, title = {M3-UDA: A New Benchmark for Unsupervised Domain Adaptive Fetal Cardiac Structure Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11621-11630} }
GPS-Gaussian: Generalizable Pixel-wise 3D Gaussian Splatting for Real-time Human Novel View Synthesis-
[pdf]
[supp]
[bibtex]@InProceedings{Zheng_2024_CVPR, author = {Zheng, Shunyuan and Zhou, Boyao and Shao, Ruizhi and Liu, Boning and Zhang, Shengping and Nie, Liqiang and Liu, Yebin}, title = {GPS-Gaussian: Generalizable Pixel-wise 3D Gaussian Splatting for Real-time Human Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19680-19690} }
Chat-UniVi: Unified Visual Representation Empowers Large Language Models with Image and Video Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Jin_2024_CVPR, author = {Jin, Peng and Takanobu, Ryuichi and Zhang, Wancai and Cao, Xiaochun and Yuan, Li}, title = {Chat-UniVi: Unified Visual Representation Empowers Large Language Models with Image and Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13700-13710} }
MAGICK: A Large-scale Captioned Dataset from Matting Generated Images using Chroma Keying-
[pdf]
[supp]
[bibtex]@InProceedings{Burgert_2024_CVPR, author = {Burgert, Ryan D. and Price, Brian L. and Kuen, Jason and Li, Yijun and Ryoo, Michael S.}, title = {MAGICK: A Large-scale Captioned Dataset from Matting Generated Images using Chroma Keying}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22595-22604} }
Video Super-Resolution Transformer with Masked Inter&Intra-Frame Attention-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Xingyu and Zhang, Leheng and Zhao, Xiaorui and Wang, Keze and Li, Leida and Gu, Shuhang}, title = {Video Super-Resolution Transformer with Masked Inter\&Intra-Frame Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25399-25408} }
Token Transformation Matters: Towards Faithful Post-hoc Explanation for Vision Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Junyi and Duan, Bin and Kang, Weitai and Tang, Hao and Yan, Yan}, title = {Token Transformation Matters: Towards Faithful Post-hoc Explanation for Vision Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10926-10935} }
Bayesian Differentiable Physics for Cloth Digitalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gong_2024_CVPR, author = {Gong, Deshan and Mao, Ningtao and Wang, He}, title = {Bayesian Differentiable Physics for Cloth Digitalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11841-11851} }
G-HOP: Generative Hand-Object Prior for Interaction Reconstruction and Grasp Synthesis-
[pdf]
[supp]
[bibtex]@InProceedings{Ye_2024_CVPR, author = {Ye, Yufei and Gupta, Abhinav and Kitani, Kris and Tulsiani, Shubham}, title = {G-HOP: Generative Hand-Object Prior for Interaction Reconstruction and Grasp Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1911-1920} }
Higher-order Relational Reasoning for Pedestrian Trajectory Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Sungjune and Chi, Hyung-gun and Lim, Hyerin and Ramani, Karthik and Kim, Jinkyu and Kim, Sangpil}, title = {Higher-order Relational Reasoning for Pedestrian Trajectory Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15251-15260} }
SurroundSDF: Implicit 3D Scene Understanding Based on Signed Distance Field-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Lizhe and Wang, Bohua and Xie, Hongwei and Liu, Daqi and Liu, Li and Tian, Zhiqiang and Yang, Kuiyuan and Wang, Bing}, title = {SurroundSDF: Implicit 3D Scene Understanding Based on Signed Distance Field}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21614-21623} }
Contrastive Denoising Score for Text-guided Latent Diffusion Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nam_2024_CVPR, author = {Nam, Hyelin and Kwon, Gihyun and Park, Geon Yeong and Ye, Jong Chul}, title = {Contrastive Denoising Score for Text-guided Latent Diffusion Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9192-9201} }
Neural Point Cloud Diffusion for Disentangled 3D Shape and Appearance Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Schroppel_2024_CVPR, author = {Schr\"oppel, Philipp and Wewer, Christopher and Lenssen, Jan Eric and Ilg, Eddy and Brox, Thomas}, title = {Neural Point Cloud Diffusion for Disentangled 3D Shape and Appearance Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8785-8794} }
RealNet: A Feature Selection Network with Realistic Synthetic Anomaly for Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Ximiao and Xu, Min and Zhou, Xiuzhuang}, title = {RealNet: A Feature Selection Network with Realistic Synthetic Anomaly for Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16699-16708} }
Outdoor Scene Extrapolation with Hierarchical Generative Cellular Automata-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Dongsu and Williams, Francis and Gojcic, Zan and Kreis, Karsten and Fidler, Sanja and Kim, Young Min and Kar, Amlan}, title = {Outdoor Scene Extrapolation with Hierarchical Generative Cellular Automata}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20145-20154} }
Instruct 4D-to-4D: Editing 4D Scenes as Pseudo-3D Scenes Using 2D Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Mou_2024_CVPR, author = {Mou, Linzhan and Chen, Jun-Kun and Wang, Yu-Xiong}, title = {Instruct 4D-to-4D: Editing 4D Scenes as Pseudo-3D Scenes Using 2D Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20176-20185} }
VAREN: Very Accurate and Realistic Equine Network-
[pdf]
[supp]
[bibtex]@InProceedings{Zuffi_2024_CVPR, author = {Zuffi, Silvia and Mellbin, Ylva and Li, Ci and Hoeschle, Markus and Kjellstr\"om, Hedvig and Polikovsky, Senya and Hernlund, Elin and Black, Michael J.}, title = {VAREN: Very Accurate and Realistic Equine Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5374-5383} }
Photo-SLAM: Real-time Simultaneous Localization and Photorealistic Mapping for Monocular Stereo and RGB-D Cameras-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Huajian and Li, Longwei and Cheng, Hui and Yeung, Sai-Kit}, title = {Photo-SLAM: Real-time Simultaneous Localization and Photorealistic Mapping for Monocular Stereo and RGB-D Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21584-21593} }
SD-DiT: Unleashing the Power of Self-supervised Discrimination in Diffusion Transformer-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Rui and Pan, Yingwei and Li, Yehao and Yao, Ting and Sun, Zhenglong and Mei, Tao and Chen, Chang Wen}, title = {SD-DiT: Unleashing the Power of Self-supervised Discrimination in Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8435-8445} }
Multi-modal Instruction Tuned LLMs with Fine-grained Visual Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2024_CVPR, author = {He, Junwen and Wang, Yifan and Wang, Lijun and Lu, Huchuan and He, Jun-Yan and Lan, Jin-Peng and Luo, Bin and Xie, Xuansong}, title = {Multi-modal Instruction Tuned LLMs with Fine-grained Visual Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13980-13990} }
ProMotion: Prototypes As Motion Learners-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lu_2024_CVPR, author = {Lu, Yawen and Liu, Dongfang and Wang, Qifan and Han, Cheng and Cui, Yiming and Cao, Zhiwen and Zhang, Xueling and Chen, Yingjie Victor and Fan, Heng}, title = {ProMotion: Prototypes As Motion Learners}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28109-28119} }
SpatialTracker: Tracking Any 2D Pixels in 3D Space-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2024_CVPR, author = {Xiao, Yuxi and Wang, Qianqian and Zhang, Shangzhan and Xue, Nan and Peng, Sida and Shen, Yujun and Zhou, Xiaowei}, title = {SpatialTracker: Tracking Any 2D Pixels in 3D Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20406-20417} }
LaMPilot: An Open Benchmark Dataset for Autonomous Driving with Language Model Programs-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ma_2024_CVPR, author = {Ma, Yunsheng and Cui, Can and Cao, Xu and Ye, Wenqian and Liu, Peiran and Lu, Juanwu and Abdelraouf, Amr and Gupta, Rohit and Han, Kyungtae and Bera, Aniket and Rehg, James M. and Wang, Ziran}, title = {LaMPilot: An Open Benchmark Dataset for Autonomous Driving with Language Model Programs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15141-15151} }
MedBN: Robust Test-Time Adaptation against Malicious Test Samples-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2024_CVPR, author = {Park, Hyejin and Hwang, Jeongyeon and Mun, Sunung and Park, Sangdon and Ok, Jungseul}, title = {MedBN: Robust Test-Time Adaptation against Malicious Test Samples}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5997-6007} }
Unsupervised Gaze Representation Learning from Multi-view Face Images-
[pdf]
[bibtex]@InProceedings{Bao_2024_CVPR, author = {Bao, Yiwei and Lu, Feng}, title = {Unsupervised Gaze Representation Learning from Multi-view Face Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1419-1428} }
FairDeDup: Detecting and Mitigating Vision-Language Fairness Disparities in Semantic Dataset Deduplication-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Slyman_2024_CVPR, author = {Slyman, Eric and Lee, Stefan and Cohen, Scott and Kafle, Kushal}, title = {FairDeDup: Detecting and Mitigating Vision-Language Fairness Disparities in Semantic Dataset Deduplication}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13905-13916} }
CrossMAE: Cross-Modality Masked Autoencoders for Region-Aware Audio-Visual Pre-Training-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2024_CVPR, author = {Guo, Yuxin and Sun, Siyang and Ma, Shuailei and Zheng, Kecheng and Bao, Xiaoyi and Ma, Shijie and Zou, Wei and Zheng, Yun}, title = {CrossMAE: Cross-Modality Masked Autoencoders for Region-Aware Audio-Visual Pre-Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26721-26731} }
Osprey: Pixel Understanding with Visual Instruction Tuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2024_CVPR, author = {Yuan, Yuqian and Li, Wentong and Liu, Jian and Tang, Dongqi and Luo, Xinjie and Qin, Chi and Zhang, Lei and Zhu, Jianke}, title = {Osprey: Pixel Understanding with Visual Instruction Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28202-28211} }
Modality-agnostic Domain Generalizable Medical Image Segmentation by Multi-Frequency in Multi-Scale Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nam_2024_CVPR, author = {Nam, Ju-Hyeon and Syazwany, Nur Suriza and Kim, Su Jung and Lee, Sang-Chul}, title = {Modality-agnostic Domain Generalizable Medical Image Segmentation by Multi-Frequency in Multi-Scale Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11480-11491} }
Few-shot Learner Parameterization by Diffusion Time-steps-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yue_2024_CVPR, author = {Yue, Zhongqi and Zhou, Pan and Hong, Richang and Zhang, Hanwang and Sun, Qianru}, title = {Few-shot Learner Parameterization by Diffusion Time-steps}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23263-23272} }
Auto MC-Reward: Automated Dense Reward Design with Large Language Models for Minecraft-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Hao and Yang, Xue and Wang, Zhaokai and Zhu, Xizhou and Zhou, Jie and Qiao, Yu and Wang, Xiaogang and Li, Hongsheng and Lu, Lewei and Dai, Jifeng}, title = {Auto MC-Reward: Automated Dense Reward Design with Large Language Models for Minecraft}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16426-16435} }
GenFlow: Generalizable Recurrent Flow for 6D Pose Refinement of Novel Objects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Moon_2024_CVPR, author = {Moon, Sungphill and Son, Hyeontae and Hur, Dongcheol and Kim, Sangwook}, title = {GenFlow: Generalizable Recurrent Flow for 6D Pose Refinement of Novel Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10039-10049} }
OrCo: Towards Better Generalization via Orthogonality and Contrast for Few-Shot Class-Incremental Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ahmed_2024_CVPR, author = {Ahmed, Noor and Kukleva, Anna and Schiele, Bernt}, title = {OrCo: Towards Better Generalization via Orthogonality and Contrast for Few-Shot Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28762-28771} }
MuGE: Multiple Granularity Edge Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Caixia and Huang, Yaping and Pu, Mengyang and Guan, Qingji and Deng, Ruoxi and Ling, Haibin}, title = {MuGE: Multiple Granularity Edge Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25952-25962} }
Real-World Efficient Blind Motion Deblurring via Blur Pixel Discretization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Insoo and Choi, Jae Seok and Seo, Geonseok and Kwon, Kinam and Shin, Jinwoo and Lee, Hyong-Euk}, title = {Real-World Efficient Blind Motion Deblurring via Blur Pixel Discretization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25879-25888} }
EmoVIT: Revolutionizing Emotion Insights with Visual Instruction Tuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2024_CVPR, author = {Xie, Hongxia and Peng, Chu-Jun and Tseng, Yu-Wen and Chen, Hung-Jen and Hsu, Chan-Feng and Shuai, Hong-Han and Cheng, Wen-Huang}, title = {EmoVIT: Revolutionizing Emotion Insights with Visual Instruction Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26596-26605} }
Learning to Count without Annotations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Knobel_2024_CVPR, author = {Knobel, Lukas and Han, Tengda and Asano, Yuki M.}, title = {Learning to Count without Annotations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22924-22934} }
Logarithmic Lenses: Exploring Log RGB Data for Image Classification-
[pdf]
[bibtex]@InProceedings{Maxwell_2024_CVPR, author = {Maxwell, Bruce A. and Singhania, Sumegha and Patel, Avnish and Kumar, Rahul and Fryling, Heather and Li, Sihan and Sun, Haonan and He, Ping and Li, Zewen}, title = {Logarithmic Lenses: Exploring Log RGB Data for Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17470-17479} }
AEROBLADE: Training-Free Detection of Latent Diffusion Images Using Autoencoder Reconstruction Error-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ricker_2024_CVPR, author = {Ricker, Jonas and Lukovnikov, Denis and Fischer, Asja}, title = {AEROBLADE: Training-Free Detection of Latent Diffusion Images Using Autoencoder Reconstruction Error}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9130-9140} }
Scaled Decoupled Distillation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wei_2024_CVPR, author = {Wei, Shicai and Luo, Chunbo and Luo, Yang}, title = {Scaled Decoupled Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15975-15983} }
NARUTO: Neural Active Reconstruction from Uncertain Target Observations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2024_CVPR, author = {Feng, Ziyue and Zhan, Huangying and Chen, Zheng and Yan, Qingan and Xu, Xiangyu and Cai, Changjiang and Li, Bing and Zhu, Qilun and Xu, Yi}, title = {NARUTO: Neural Active Reconstruction from Uncertain Target Observations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21572-21583} }
Point2CAD: Reverse Engineering CAD Models from 3D Point Clouds-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Yujia and Obukhov, Anton and Wegner, Jan Dirk and Schindler, Konrad}, title = {Point2CAD: Reverse Engineering CAD Models from 3D Point Clouds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3763-3772} }
Learnable Earth Parser: Discovering 3D Prototypes in Aerial Scans-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Loiseau_2024_CVPR, author = {Loiseau, Romain and Vincent, Elliot and Aubry, Mathieu and Landrieu, Loic}, title = {Learnable Earth Parser: Discovering 3D Prototypes in Aerial Scans}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27874-27884} }
NeRFiller: Completing Scenes via Generative 3D Inpainting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Weber_2024_CVPR, author = {Weber, Ethan and Holynski, Aleksander and Jampani, Varun and Saxena, Saurabh and Snavely, Noah and Kar, Abhishek and Kanazawa, Angjoo}, title = {NeRFiller: Completing Scenes via Generative 3D Inpainting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20731-20741} }
Cloud-Device Collaborative Learning for Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Guanqun and Liu, Jiaming and Li, Chenxuan and Zhang, Yuan and Ma, Junpeng and Wei, Xinyu and Zhang, Kevin and Chong, Maurice and Zhang, Renrui and Liu, Yijiang and Zhang, Shanghang}, title = {Cloud-Device Collaborative Learning for Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12646-12655} }
KD-DETR: Knowledge Distillation for Detection Transformer with Consistent Distillation Points Sampling-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Yu and Li, Xin and Weng, Shengzhao and Zhang, Gang and Yue, Haixiao and Feng, Haocheng and Han, Junyu and Ding, Errui}, title = {KD-DETR: Knowledge Distillation for Detection Transformer with Consistent Distillation Points Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16016-16025} }
Absolute Pose from One or Two Scaled and Oriented Features-
[pdf]
[supp]
[bibtex]@InProceedings{Ventura_2024_CVPR, author = {Ventura, Jonathan and Kukelova, Zuzana and Sattler, Torsten and Bar\'ath, D\'aniel}, title = {Absolute Pose from One or Two Scaled and Oriented Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20870-20880} }
Source-Free Domain Adaptation with Frozen Multimodal Foundation Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2024_CVPR, author = {Tang, Song and Su, Wenxin and Ye, Mao and Zhu, Xiatian}, title = {Source-Free Domain Adaptation with Frozen Multimodal Foundation Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23711-23720} }
LocLLM: Exploiting Generalizable Human Keypoint Localization via Large Language Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Dongkai and Xuan, Shiyu and Zhang, Shiliang}, title = {LocLLM: Exploiting Generalizable Human Keypoint Localization via Large Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {614-623} }
MMA-Diffusion: MultiModal Attack on Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Yijun and Gao, Ruiyuan and Wang, Xiaosen and Ho, Tsung-Yi and Xu, Nan and Xu, Qiang}, title = {MMA-Diffusion: MultiModal Attack on Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7737-7746} }
Benchmarking Audio Visual Segmentation for Long-Untrimmed Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Chen and Li, Peike Patrick and Yu, Qingtao and Sheng, Hongwei and Wang, Dadong and Li, Lincheng and Yu, Xin}, title = {Benchmarking Audio Visual Segmentation for Long-Untrimmed Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22712-22722} }
EMCAD: Efficient Multi-scale Convolutional Attention Decoding for Medical Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rahman_2024_CVPR, author = {Rahman, Md Mostafijur and Munir, Mustafa and Marculescu, Radu}, title = {EMCAD: Efficient Multi-scale Convolutional Attention Decoding for Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11769-11779} }
VTQA: Visual Text Question Answering via Entity Alignment and Cross-Media Reasoning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Kang and Wu, Xiangqian}, title = {VTQA: Visual Text Question Answering via Entity Alignment and Cross-Media Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27218-27227} }
QN-Mixer: A Quasi-Newton MLP-Mixer Model for Sparse-View CT Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Ayad_2024_CVPR, author = {Ayad, Ishak and Larue, Nicolas and Nguyen, Mai K.}, title = {QN-Mixer: A Quasi-Newton MLP-Mixer Model for Sparse-View CT Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25317-25326} }
Learning CNN on ViT: A Hybrid Model to Explicitly Class-specific Boundaries for Domain Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ngo_2024_CVPR, author = {Ngo, Ba Hung and Do-Tran, Nhat-Tuong and Nguyen, Tuan-Ngoc and Jeon, Hae-Gon and Choi, Tae Jong}, title = {Learning CNN on ViT: A Hybrid Model to Explicitly Class-specific Boundaries for Domain Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28545-28554} }
A Picture is Worth More Than 77 Text Tokens: Evaluating CLIP-Style Models on Dense Captions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Urbanek_2024_CVPR, author = {Urbanek, Jack and Bordes, Florian and Astolfi, Pietro and Williamson, Mary and Sharma, Vasu and Romero-Soriano, Adriana}, title = {A Picture is Worth More Than 77 Text Tokens: Evaluating CLIP-Style Models on Dense Captions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26700-26709} }
HanDiffuser: Text-to-Image Generation With Realistic Hand Appearances-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Narasimhaswamy_2024_CVPR, author = {Narasimhaswamy, Supreeth and Bhattacharya, Uttaran and Chen, Xiang and Dasgupta, Ishita and Mitra, Saayan and Hoai, Minh}, title = {HanDiffuser: Text-to-Image Generation With Realistic Hand Appearances}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2468-2479} }
Infinigen Indoors: Photorealistic Indoor Scenes using Procedural Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Raistrick_2024_CVPR, author = {Raistrick, Alexander and Mei, Lingjie and Kayan, Karhan and Yan, David and Zuo, Yiming and Han, Beining and Wen, Hongyu and Parakh, Meenal and Alexandropoulos, Stamatis and Lipson, Lahav and Ma, Zeyu and Deng, Jia}, title = {Infinigen Indoors: Photorealistic Indoor Scenes using Procedural Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21783-21794} }
MART: Masked Affective RepresenTation Learning via Masked Temporal Distribution Distillation-
[pdf]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Zhicheng and Zhao, Pancheng and Park, Eunil and Yang, Jufeng}, title = {MART: Masked Affective RepresenTation Learning via Masked Temporal Distribution Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12830-12840} }
MTLoRA: Low-Rank Adaptation Approach for Efficient Multi-Task Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Agiza_2024_CVPR, author = {Agiza, Ahmed and Neseem, Marina and Reda, Sherief}, title = {MTLoRA: Low-Rank Adaptation Approach for Efficient Multi-Task Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16196-16205} }
Hierarchical Patch Diffusion Models for High-Resolution Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Skorokhodov_2024_CVPR, author = {Skorokhodov, Ivan and Menapace, Willi and Siarohin, Aliaksandr and Tulyakov, Sergey}, title = {Hierarchical Patch Diffusion Models for High-Resolution Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7569-7579} }
Motion Blur Decomposition with Cross-shutter Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ji_2024_CVPR, author = {Ji, Xiang and Jiang, Haiyang and Zheng, Yinqiang}, title = {Motion Blur Decomposition with Cross-shutter Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12534-12543} }
Scene-adaptive and Region-aware Multi-modal Prompt for Open Vocabulary Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Xiaowei and Liu, Xianglong and Wang, Duorui and Gao, Yajun and Liu, Zhide}, title = {Scene-adaptive and Region-aware Multi-modal Prompt for Open Vocabulary Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16741-16750} }
MimicDiffusion: Purifying Adversarial Perturbation via Mimicking Clean Diffusion Model-
[pdf]
[supp]
[bibtex]@InProceedings{Song_2024_CVPR, author = {Song, Kaiyu and Lai, Hanjiang and Pan, Yan and Yin, Jian}, title = {MimicDiffusion: Purifying Adversarial Perturbation via Mimicking Clean Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24665-24674} }
Neural Implicit Morphing of Face Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Schardong_2024_CVPR, author = {Schardong, Guilherme and Novello, Tiago and Paz, Hallison and Medvedev, Iurii and da Silva, Vin{\'\i}cius and Velho, Luiz and Gon\c{c}alves, Nuno}, title = {Neural Implicit Morphing of Face Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7321-7330} }
UniGS: Unified Representation for Image Generation and Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qi_2024_CVPR, author = {Qi, Lu and Yang, Lehan and Guo, Weidong and Xu, Yu and Du, Bo and Jampani, Varun and Yang, Ming-Hsuan}, title = {UniGS: Unified Representation for Image Generation and Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6305-6315} }
Robust Synthetic-to-Real Transfer for Stereo Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Jiawei and Li, Jiahe and Huang, Lei and Yu, Xiaohan and Gu, Lin and Zheng, Jin and Bai, Xiao}, title = {Robust Synthetic-to-Real Transfer for Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20247-20257} }
Instance-Aware Group Quantization for Vision Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Moon_2024_CVPR, author = {Moon, Jaehyeon and Kim, Dohyung and Cheon, Junyong and Ham, Bumsub}, title = {Instance-Aware Group Quantization for Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16132-16141} }
A General and Efficient Training for Transformer via Token Expansion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Wenxuan and Shen, Yunhang and Xie, Jiao and Zhang, Baochang and He, Gaoqi and Li, Ke and Sun, Xing and Lin, Shaohui}, title = {A General and Efficient Training for Transformer via Token Expansion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15783-15792} }
GenZI: Zero-Shot 3D Human-Scene Interaction Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Lei and Dai, Angela}, title = {GenZI: Zero-Shot 3D Human-Scene Interaction Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20465-20474} }
Tyche: Stochastic In-Context Learning for Medical Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rakic_2024_CVPR, author = {Rakic, Marianne and Wong, Hallee E. and Ortiz, Jose Javier Gonzalez and Cimini, Beth A. and Guttag, John V. and Dalca, Adrian V.}, title = {Tyche: Stochastic In-Context Learning for Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11159-11173} }
DiffAssemble: A Unified Graph-Diffusion Model for 2D and 3D Reassembly-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Scarpellini_2024_CVPR, author = {Scarpellini, Gianluca and Fiorini, Stefano and Giuliari, Francesco and Moreiro, Pietro and Del Bue, Alessio}, title = {DiffAssemble: A Unified Graph-Diffusion Model for 2D and 3D Reassembly}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28098-28108} }
NeISF: Neural Incident Stokes Field for Geometry and Material Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Chenhao and Ono, Taishi and Uemori, Takeshi and Mihara, Hajime and Gatto, Alexander and Nagahara, Hajime and Moriuchi, Yusuke}, title = {NeISF: Neural Incident Stokes Field for Geometry and Material Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21434-21445} }
Training-Free Open-Vocabulary Segmentation with Offline Diffusion-Augmented Prototype Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Barsellotti_2024_CVPR, author = {Barsellotti, Luca and Amoroso, Roberto and Cornia, Marcella and Baraldi, Lorenzo and Cucchiara, Rita}, title = {Training-Free Open-Vocabulary Segmentation with Offline Diffusion-Augmented Prototype Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3689-3698} }
YOLO-World: Real-Time Open-Vocabulary Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Cheng_2024_CVPR, author = {Cheng, Tianheng and Song, Lin and Ge, Yixiao and Liu, Wenyu and Wang, Xinggang and Shan, Ying}, title = {YOLO-World: Real-Time Open-Vocabulary Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16901-16911} }
ViT-Lens: Towards Omni-modal Representations-
[pdf]
[supp]
[bibtex]@InProceedings{Lei_2024_CVPR, author = {Lei, Weixian and Ge, Yixiao and Yi, Kun and Zhang, Jianfeng and Gao, Difei and Sun, Dylan and Ge, Yuying and Shan, Ying and Shou, Mike Zheng}, title = {ViT-Lens: Towards Omni-modal Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26647-26657} }
Cross-Dimension Affinity Distillation for 3D EM Neuron Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Xiaoyu and Cai, Miaomiao and Chen, Yinda and Zhang, Yueyi and Shi, Te and Zhang, Ruobing and Chen, Xuejin and Xiong, Zhiwei}, title = {Cross-Dimension Affinity Distillation for 3D EM Neuron Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11104-11113} }
HUGS: Human Gaussian Splats-
[pdf]
[arXiv]
[bibtex]@InProceedings{Kocabas_2024_CVPR, author = {Kocabas, Muhammed and Chang, Jen-Hao Rick and Gabriel, James and Tuzel, Oncel and Ranjan, Anurag}, title = {HUGS: Human Gaussian Splats}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {505-515} }
GeoChat: Grounded Large Vision-Language Model for Remote Sensing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kuckreja_2024_CVPR, author = {Kuckreja, Kartik and Danish, Muhammad Sohail and Naseer, Muzammal and Das, Abhijit and Khan, Salman and Khan, Fahad Shahbaz}, title = {GeoChat: Grounded Large Vision-Language Model for Remote Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27831-27840} }
PhysPT: Physics-aware Pretrained Transformer for Estimating Human Dynamics from Monocular Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yufei and Kephart, Jeffrey O. and Cui, Zijun and Ji, Qiang}, title = {PhysPT: Physics-aware Pretrained Transformer for Estimating Human Dynamics from Monocular Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2305-2317} }
Producing and Leveraging Online Map Uncertainty in Trajectory Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gu_2024_CVPR, author = {Gu, Xunjiang and Song, Guanyu and Gilitschenski, Igor and Pavone, Marco and Ivanovic, Boris}, title = {Producing and Leveraging Online Map Uncertainty in Trajectory Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14521-14530} }
PerceptionGPT: Effectively Fusing Visual Perception into LLM-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pi_2024_CVPR, author = {Pi, Renjie and Yao, Lewei and Gao, Jiahui and Zhang, Jipeng and Zhang, Tong}, title = {PerceptionGPT: Effectively Fusing Visual Perception into LLM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27124-27133} }
Probabilistic Speech-Driven 3D Facial Motion Synthesis: New Benchmarks Methods and Applications-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Karren D. and Ranjan, Anurag and Chang, Jen-Hao Rick and Vemulapalli, Raviteja and Tuzel, Oncel}, title = {Probabilistic Speech-Driven 3D Facial Motion Synthesis: New Benchmarks Methods and Applications}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27294-27303} }
LASO: Language-guided Affordance Segmentation on 3D Object-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Yicong and Zhao, Na and Xiao, Junbin and Feng, Chun and Wang, Xiang and Chua, Tat-seng}, title = {LASO: Language-guided Affordance Segmentation on 3D Object}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14251-14260} }
Riemannian Multinomial Logistics Regression for SPD Neural Networks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Ziheng and Song, Yue and Liu, Gaowen and Kompella, Ramana Rao and Wu, Xiao-Jun and Sebe, Nicu}, title = {Riemannian Multinomial Logistics Regression for SPD Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17086-17096} }
FreGS: 3D Gaussian Splatting with Progressive Frequency Regularization-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Jiahui and Zhan, Fangneng and Xu, Muyu and Lu, Shijian and Xing, Eric}, title = {FreGS: 3D Gaussian Splatting with Progressive Frequency Regularization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21424-21433} }
Discriminative Sample-Guided and Parameter-Efficient Feature Space Adaptation for Cross-Domain Few-Shot Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Perera_2024_CVPR, author = {Perera, Rashindrie and Halgamuge, Saman}, title = {Discriminative Sample-Guided and Parameter-Efficient Feature Space Adaptation for Cross-Domain Few-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23794-23804} }
What Sketch Explainability Really Means for Downstream Tasks?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bandyopadhyay_2024_CVPR, author = {Bandyopadhyay, Hmrishav and Chowdhury, Pinaki Nath and Bhunia, Ayan Kumar and Sain, Aneeshan and Xiang, Tao and Song, Yi-Zhe}, title = {What Sketch Explainability Really Means for Downstream Tasks?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10997-11008} }
Neural Exposure Fusion for High-Dynamic Range Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Onzon_2024_CVPR, author = {Onzon, Emmanuel and B\"omer, Maximilian and Mannan, Fahim and Heide, Felix}, title = {Neural Exposure Fusion for High-Dynamic Range Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17564-17573} }
EfficientDreamer: High-Fidelity and Robust 3D Creation via Orthogonal-view Diffusion Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2024_CVPR, author = {Hu, Zhipeng and Zhao, Minda and Zhao, Chaoyi and Liang, Xinyue and Li, Lincheng and Zhao, Zeng and Fan, Changjie and Zhou, Xiaowei and Yu, Xin}, title = {EfficientDreamer: High-Fidelity and Robust 3D Creation via Orthogonal-view Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4949-4958} }
HOIAnimator: Generating Text-prompt Human-object Animations using Novel Perceptive Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Song_2024_CVPR, author = {Song, Wenfeng and Zhang, Xinyu and Li, Shuai and Gao, Yang and Hao, Aimin and Hou, Xia and Chen, Chenglizhao and Li, Ning and Qin, Hong}, title = {HOIAnimator: Generating Text-prompt Human-object Animations using Novel Perceptive Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {811-820} }
SyncTalk: The Devil is in the Synchronization for Talking Head Synthesis-
[pdf]
[arXiv]
[bibtex]@InProceedings{Peng_2024_CVPR, author = {Peng, Ziqiao and Hu, Wentao and Shi, Yue and Zhu, Xiangyu and Zhang, Xiaomei and Zhao, Hao and He, Jun and Liu, Hongyan and Fan, Zhaoxin}, title = {SyncTalk: The Devil is in the Synchronization for Talking Head Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {666-676} }
SFOD: Spiking Fusion Object Detector-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2024_CVPR, author = {Fan, Yimeng and Zhang, Wei and Liu, Changsong and Li, Mingyang and Lu, Wenrui}, title = {SFOD: Spiking Fusion Object Detector}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17191-17200} }
Detector-Free Structure from Motion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2024_CVPR, author = {He, Xingyi and Sun, Jiaming and Wang, Yifan and Peng, Sida and Huang, Qixing and Bao, Hujun and Zhou, Xiaowei}, title = {Detector-Free Structure from Motion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21594-21603} }
CG-HOI: Contact-Guided 3D Human-Object Interaction Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Diller_2024_CVPR, author = {Diller, Christian and Dai, Angela}, title = {CG-HOI: Contact-Guided 3D Human-Object Interaction Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19888-19901} }
Towards Surveillance Video-and-Language Understanding: New Dataset Baselines and Challenges-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2024_CVPR, author = {Yuan, Tongtong and Zhang, Xuange and Liu, Kun and Liu, Bo and Chen, Chen and Jin, Jian and Jiao, Zhenzhen}, title = {Towards Surveillance Video-and-Language Understanding: New Dataset Baselines and Challenges}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22052-22061} }
AdaRevD: Adaptive Patch Exiting Reversible Decoder Pushes the Limit of Image Deblurring-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mao_2024_CVPR, author = {Mao, Xintian and Li, Qingli and Wang, Yan}, title = {AdaRevD: Adaptive Patch Exiting Reversible Decoder Pushes the Limit of Image Deblurring}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25681-25690} }
Learning to Remove Wrinkled Transparent Film with Polarized Prior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2024_CVPR, author = {Tang, Jiaqi and Wu, Ruizheng and Xu, Xiaogang and Hu, Sixing and Chen, Ying-Cong}, title = {Learning to Remove Wrinkled Transparent Film with Polarized Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24987-24996} }
OpenEQA: Embodied Question Answering in the Era of Foundation Models-
[pdf]
[supp]
[bibtex]@InProceedings{Majumdar_2024_CVPR, author = {Majumdar, Arjun and Ajay, Anurag and Zhang, Xiaohan and Putta, Pranav and Yenamandra, Sriram and Henaff, Mikael and Silwal, Sneha and Mcvay, Paul and Maksymets, Oleksandr and Arnaud, Sergio and Yadav, Karmesh and Li, Qiyang and Newman, Ben and Sharma, Mohit and Berges, Vincent and Zhang, Shiqi and Agrawal, Pulkit and Bisk, Yonatan and Batra, Dhruv and Kalakrishnan, Mrinal and Meier, Franziska and Paxton, Chris and Sax, Alexander and Rajeswaran, Aravind}, title = {OpenEQA: Embodied Question Answering in the Era of Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16488-16498} }
DreamSalon: A Staged Diffusion Framework for Preserving Identity-Context in Editable Face Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2024_CVPR, author = {Lin, Haonan}, title = {DreamSalon: A Staged Diffusion Framework for Preserving Identity-Context in Editable Face Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8589-8598} }
Dispel Darkness for Better Fusion: A Controllable Visual Enhancer based on Cross-modal Conditional Adversarial Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Hao and Tang, Linfeng and Xiang, Xinyu and Zuo, Xuhui and Ma, Jiayi}, title = {Dispel Darkness for Better Fusion: A Controllable Visual Enhancer based on Cross-modal Conditional Adversarial Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26487-26496} }
Querying as Prompt: Parameter-Efficient Learning for Multimodal Language Model-
[pdf]
[supp]
[bibtex]@InProceedings{Liang_2024_CVPR, author = {Liang, Tian and Huang, Jing and Kong, Ming and Chen, Luyuan and Zhu, Qiang}, title = {Querying as Prompt: Parameter-Efficient Learning for Multimodal Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26855-26865} }
DePT: Decoupled Prompt Tuning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Ji and Wu, Shihan and Gao, Lianli and Shen, Heng Tao and Song, Jingkuan}, title = {DePT: Decoupled Prompt Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12924-12933} }
Neural Super-Resolution for Real-time Rendering with Radiance Demodulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Jia and Chen, Ziling and Wu, Xiaolong and Wang, Lu and Wang, Beibei and Zhang, Lei}, title = {Neural Super-Resolution for Real-time Rendering with Radiance Demodulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4357-4367} }
Deformable 3D Gaussians for High-Fidelity Monocular Dynamic Scene Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Ziyi and Gao, Xinyu and Zhou, Wen and Jiao, Shaohui and Zhang, Yuqing and Jin, Xiaogang}, title = {Deformable 3D Gaussians for High-Fidelity Monocular Dynamic Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20331-20341} }
Enhancing 3D Object Detection with 2D Detection-Guided Query Anchors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ji_2024_CVPR, author = {Ji, Haoxuanye and Liang, Pengpeng and Cheng, Erkang}, title = {Enhancing 3D Object Detection with 2D Detection-Guided Query Anchors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21178-21187} }
Continual Forgetting for Pre-trained Vision Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Hongbo and Ni, Bolin and Fan, Junsong and Wang, Yuxi and Chen, Yuntao and Meng, Gaofeng and Zhang, Zhaoxiang}, title = {Continual Forgetting for Pre-trained Vision Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28631-28642} }
Real Acoustic Fields: An Audio-Visual Room Acoustics Dataset and Benchmark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Ziyang and Gebru, Israel D. and Richardt, Christian and Kumar, Anurag and Laney, William and Owens, Andrew and Richard, Alexander}, title = {Real Acoustic Fields: An Audio-Visual Room Acoustics Dataset and Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21886-21896} }
A Generative Approach for Wikipedia-Scale Visual Entity Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Caron_2024_CVPR, author = {Caron, Mathilde and Iscen, Ahmet and Fathi, Alireza and Schmid, Cordelia}, title = {A Generative Approach for Wikipedia-Scale Visual Entity Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17313-17322} }
A Physics-informed Low-rank Deep Neural Network for Blind and Universal Lens Aberration Correction-
[pdf]
[bibtex]@InProceedings{Gong_2024_CVPR, author = {Gong, Jin and Yang, Runzhao and Zhang, Weihang and Suo, Jinli and Dai, Qionghai}, title = {A Physics-informed Low-rank Deep Neural Network for Blind and Universal Lens Aberration Correction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24861-24870} }
Open-Vocabulary Object 6D Pose Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Corsetti_2024_CVPR, author = {Corsetti, Jaime and Boscaini, Davide and Oh, Changjae and Cavallaro, Andrea and Poiesi, Fabio}, title = {Open-Vocabulary Object 6D Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18071-18080} }
Plug and Play Active Learning for Object Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Chenhongyi and Huang, Lichao and Crowley, Elliot J.}, title = {Plug and Play Active Learning for Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17784-17793} }
Calibrating Multi-modal Representations: A Pursuit of Group Robustness without Annotations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{You_2024_CVPR, author = {You, Chenyu and Min, Yifei and Dai, Weicheng and Sekhon, Jasjeet S. and Staib, Lawrence and Duncan, James S.}, title = {Calibrating Multi-modal Representations: A Pursuit of Group Robustness without Annotations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26140-26150} }
LiSA: LiDAR Localization with Semantic Awareness-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Bochun and Li, Zijun and Li, Wen and Cai, Zhipeng and Wen, Chenglu and Zang, Yu and Muller, Matthias and Wang, Cheng}, title = {LiSA: LiDAR Localization with Semantic Awareness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15271-15280} }
MMM: Generative Masked Motion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pinyoanuntapong_2024_CVPR, author = {Pinyoanuntapong, Ekkasit and Wang, Pu and Lee, Minwoo and Chen, Chen}, title = {MMM: Generative Masked Motion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1546-1555} }
PEGASUS: Personalized Generative 3D Avatars with Composable Attributes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cha_2024_CVPR, author = {Cha, Hyunsoo and Kim, Byungjun and Joo, Hanbyul}, title = {PEGASUS: Personalized Generative 3D Avatars with Composable Attributes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1072-1081} }
LMDrive: Closed-Loop End-to-End Driving with Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shao_2024_CVPR, author = {Shao, Hao and Hu, Yuxuan and Wang, Letian and Song, Guanglu and Waslander, Steven L. and Liu, Yu and Li, Hongsheng}, title = {LMDrive: Closed-Loop End-to-End Driving with Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15120-15130} }
MCD: Diverse Large-Scale Multi-Campus Dataset for Robot Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2024_CVPR, author = {Nguyen, Thien-Minh and Yuan, Shenghai and Nguyen, Thien Hoang and Yin, Pengyu and Cao, Haozhi and Xie, Lihua and Wozniak, Maciej and Jensfelt, Patric and Thiel, Marko and Ziegenbein, Justin and Blunder, Noel}, title = {MCD: Diverse Large-Scale Multi-Campus Dataset for Robot Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22304-22313} }
Diff-Plugin: Revitalizing Details for Diffusion-based Low-level Tasks-
[pdf]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Yuhao and Ke, Zhanghan and Liu, Fang and Zhao, Nanxuan and Lau, Rynson W.H.}, title = {Diff-Plugin: Revitalizing Details for Diffusion-based Low-level Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4197-4208} }
AHIVE: Anatomy-aware Hierarchical Vision Encoding for Interactive Radiology Report Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Yan_2024_CVPR, author = {Yan, Sixing and Cheung, William K. and Tsang, Ivor W. and Chiu, Keith and Tong, Terence M. and Cheung, Ka Chun and See, Simon}, title = {AHIVE: Anatomy-aware Hierarchical Vision Encoding for Interactive Radiology Report Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14324-14333} }
CyberDemo: Augmenting Simulated Human Demonstration for Real-World Dexterous Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Jun and Qin, Yuzhe and Kuang, Kaiming and Korkmaz, Yigit and Gurumoorthy, Akhilan and Su, Hao and Wang, Xiaolong}, title = {CyberDemo: Augmenting Simulated Human Demonstration for Real-World Dexterous Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17952-17963} }
MaskCLR: Attention-Guided Contrastive Learning for Robust Action Representation Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Abdelfattah_2024_CVPR, author = {Abdelfattah, Mohamed and Hassan, Mariam and Alahi, Alexandre}, title = {MaskCLR: Attention-Guided Contrastive Learning for Robust Action Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18678-18687} }
Narrative Action Evaluation with Prompt-Guided Multimodal Interaction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Shiyi and Bai, Sule and Chen, Guangyi and Chen, Lei and Lu, Jiwen and Wang, Junle and Tang, Yansong}, title = {Narrative Action Evaluation with Prompt-Guided Multimodal Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18430-18439} }
R-Cyclic Diffuser: Reductive and Cyclic Latent Diffusion for 3D Clothed Human Digitalization-
[pdf]
[supp]
[bibtex]@InProceedings{Chan_2024_CVPR, author = {Chan, Kennard Yanting and Liu, Fayao and Lin, Guosheng and Foo, Chuan Sheng and Lin, Weisi}, title = {R-Cyclic Diffuser: Reductive and Cyclic Latent Diffusion for 3D Clothed Human Digitalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10304-10313} }
Intelligent Grimm - Open-ended Visual Storytelling via Latent Diffusion Models-
[pdf]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Chang and Wu, Haoning and Zhong, Yujie and Zhang, Xiaoyun and Wang, Yanfeng and Xie, Weidi}, title = {Intelligent Grimm - Open-ended Visual Storytelling via Latent Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6190-6200} }
Validating Privacy-Preserving Face Recognition under a Minimum Assumption-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Hui and Dong, Xingbo and Lai, YenLung and Zhou, Ying and Zhang, Xiaoyan and Lv, Xingguo and Jin, Zhe and Li, Xuejun}, title = {Validating Privacy-Preserving Face Recognition under a Minimum Assumption}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12205-12214} }
Long-Tailed Anomaly Detection with Learnable Class Names-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ho_2024_CVPR, author = {Ho, Chih-Hui and Peng, Kuan-Chuan and Vasconcelos, Nuno}, title = {Long-Tailed Anomaly Detection with Learnable Class Names}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12435-12446} }
ArGue: Attribute-Guided Prompt Tuning for Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tian_2024_CVPR, author = {Tian, Xinyu and Zou, Shu and Yang, Zhaoyuan and Zhang, Jing}, title = {ArGue: Attribute-Guided Prompt Tuning for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28578-28587} }
Rapid 3D Model Generation with Intuitive 3D Input-
[pdf]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Tianrun and Ding, Chaotao and Zhang, Shangzhan and Yu, Chunan and Zang, Ying and Li, Zejian and Peng, Sida and Sun, Lingyun}, title = {Rapid 3D Model Generation with Intuitive 3D Input}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12554-12564} }
GenTron: Diffusion Transformers for Image and Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Shoufa and Xu, Mengmeng and Ren, Jiawei and Cong, Yuren and He, Sen and Xie, Yanping and Sinha, Animesh and Luo, Ping and Xiang, Tao and Perez-Rua, Juan-Manuel}, title = {GenTron: Diffusion Transformers for Image and Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6441-6451} }
Close Imitation of Expert Retouching for Black-and-White Photography-
[pdf]
[bibtex]@InProceedings{Shin_2024_CVPR, author = {Shin, Seunghyun and Shin, Jisu and Bae, Jihwan and Shim, Inwook and Jeon, Hae-Gon}, title = {Close Imitation of Expert Retouching for Black-and-White Photography}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25037-25046} }
TRIP: Temporal Residual Learning with Image Noise Prior for Image-to-Video Diffusion Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Zhongwei and Long, Fuchen and Pan, Yingwei and Qiu, Zhaofan and Yao, Ting and Cao, Yang and Mei, Tao}, title = {TRIP: Temporal Residual Learning with Image Noise Prior for Image-to-Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8671-8681} }
TexVocab: Texture Vocabulary-conditioned Human Avatars-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Yuxiao and Li, Zhe and Liu, Yebin and Wang, Haoqian}, title = {TexVocab: Texture Vocabulary-conditioned Human Avatars}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1715-1725} }
KITRO: Refining Human Mesh by 2D Clues and Kinematic-tree Rotation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Fengyuan and Gu, Kerui and Yao, Angela}, title = {KITRO: Refining Human Mesh by 2D Clues and Kinematic-tree Rotation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1052-1061} }
BoQ: A Place is Worth a Bag of Learnable Queries-
[pdf]
[supp]
[bibtex]@InProceedings{Ali-bey_2024_CVPR, author = {Ali-bey, Amar and Chaib-draa, Brahim and Gigu\`ere, Philippe}, title = {BoQ: A Place is Worth a Bag of Learnable Queries}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17794-17803} }
SuGaR: Surface-Aligned Gaussian Splatting for Efficient 3D Mesh Reconstruction and High-Quality Mesh Rendering-
[pdf]
[supp]
[bibtex]@InProceedings{Guedon_2024_CVPR, author = {Gu\'edon, Antoine and Lepetit, Vincent}, title = {SuGaR: Surface-Aligned Gaussian Splatting for Efficient 3D Mesh Reconstruction and High-Quality Mesh Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5354-5363} }
Understanding and Improving Source-free Domain Adaptation from a Theoretical Perspective-
[pdf]
[supp]
[bibtex]@InProceedings{Mitsuzumi_2024_CVPR, author = {Mitsuzumi, Yu and Kimura, Akisato and Kashima, Hisashi}, title = {Understanding and Improving Source-free Domain Adaptation from a Theoretical Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28515-28524} }
Learning SO(3)-Invariant Semantic Correspondence via Local Shape Transform-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2024_CVPR, author = {Park, Chunghyun and Kim, Seungwook and Park, Jaesik and Cho, Minsu}, title = {Learning SO(3)-Invariant Semantic Correspondence via Local Shape Transform}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22978-22987} }
GigaPose: Fast and Robust Novel Object Pose Estimation via One Correspondence-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2024_CVPR, author = {Nguyen, Van Nguyen and Groueix, Thibault and Salzmann, Mathieu and Lepetit, Vincent}, title = {GigaPose: Fast and Robust Novel Object Pose Estimation via One Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9903-9913} }
Imagine Before Go: Self-Supervised Generative Map for Object Goal Navigation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Sixian and Yu, Xinyao and Song, Xinhang and Wang, Xiaohan and Jiang, Shuqiang}, title = {Imagine Before Go: Self-Supervised Generative Map for Object Goal Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16414-16425} }
Towards Effective Usage of Human-Centric Priors in Diffusion Models for Text-based Human Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Junyan and Sun, Zhenhong and Tan, Zhiyu and Chen, Xuanbai and Chen, Weihua and Li, Hao and Zhang, Cheng and Song, Yang}, title = {Towards Effective Usage of Human-Centric Priors in Diffusion Models for Text-based Human Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8446-8455} }
A Video is Worth 256 Bases: Spatial-Temporal Expectation-Maximization Inversion for Zero-Shot Video Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Maomao and Li, Yu and Yang, Tianyu and Liu, Yunfei and Yue, Dongxu and Lin, Zhihui and Xu, Dong}, title = {A Video is Worth 256 Bases: Spatial-Temporal Expectation-Maximization Inversion for Zero-Shot Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7528-7537} }
HIPTrack: Visual Tracking with Historical Prompts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2024_CVPR, author = {Cai, Wenrui and Liu, Qingjie and Wang, Yunhong}, title = {HIPTrack: Visual Tracking with Historical Prompts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19258-19267} }
URHand: Universal Relightable Hands-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Zhaoxi and Moon, Gyeongsik and Guo, Kaiwen and Cao, Chen and Pidhorskyi, Stanislav and Simon, Tomas and Joshi, Rohan and Dong, Yuan and Xu, Yichen and Pires, Bernardo and Wen, He and Evans, Lucas and Peng, Bo and Buffalini, Julia and Trimble, Autumn and McPhail, Kevyn and Schoeller, Melissa and Yu, Shoou-I and Romero, Javier and Zollhofer, Michael and Sheikh, Yaser and Liu, Ziwei and Saito, Shunsuke}, title = {URHand: Universal Relightable Hands}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {119-129} }
An N-Point Linear Solver for Line and Motion Estimation with Event Cameras-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2024_CVPR, author = {Gao, Ling and Gehrig, Daniel and Su, Hang and Scaramuzza, Davide and Kneip, Laurent}, title = {An N-Point Linear Solver for Line and Motion Estimation with Event Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14596-14605} }
GenNBV: Generalizable Next-Best-View Policy for Active 3D Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Xiao and Li, Quanyi and Wang, Tai and Xue, Tianfan and Pang, Jiangmiao}, title = {GenNBV: Generalizable Next-Best-View Policy for Active 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16436-16445} }
Deep-TROJ: An Inference Stage Trojan Insertion Algorithm through Efficient Weight Replacement Attack-
[pdf]
[bibtex]@InProceedings{Ahmed_2024_CVPR, author = {Ahmed, Sabbir and Zhou, Ranyang and Angizi, Shaahin and Rakin, Adnan Siraj}, title = {Deep-TROJ: An Inference Stage Trojan Insertion Algorithm through Efficient Weight Replacement Attack}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24810-24819} }
Investigating and Mitigating the Side Effects of Noisy Views for Self-Supervised Clustering Algorithms in Practical Multi-View Scenarios-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Jie and Ren, Yazhou and Wang, Xiaolong and Feng, Lei and Zhang, Zheng and Niu, Gang and Zhu, Xiaofeng}, title = {Investigating and Mitigating the Side Effects of Noisy Views for Self-Supervised Clustering Algorithms in Practical Multi-View Scenarios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22957-22966} }
EvalCrafter: Benchmarking and Evaluating Large Video Generation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Yaofang and Cun, Xiaodong and Liu, Xuebo and Wang, Xintao and Zhang, Yong and Chen, Haoxin and Liu, Yang and Zeng, Tieyong and Chan, Raymond and Shan, Ying}, title = {EvalCrafter: Benchmarking and Evaluating Large Video Generation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22139-22149} }
SelfOcc: Self-Supervised Vision-Based 3D Occupancy Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Yuanhui and Zheng, Wenzhao and Zhang, Borui and Zhou, Jie and Lu, Jiwen}, title = {SelfOcc: Self-Supervised Vision-Based 3D Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19946-19956} }
SubT-MRS Dataset: Pushing SLAM Towards All-weather Environments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Shibo and Gao, Yuanjun and Wu, Tianhao and Singh, Damanpreet and Jiang, Rushan and Sun, Haoxiang and Sarawata, Mansi and Qiu, Yuheng and Whittaker, Warren and Higgins, Ian and Du, Yi and Su, Shaoshu and Xu, Can and Keller, John and Karhade, Jay and Nogueira, Lucas and Saha, Sourojit and Zhang, Ji and Wang, Wenshan and Wang, Chen and Scherer, Sebastian}, title = {SubT-MRS Dataset: Pushing SLAM Towards All-weather Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22647-22657} }
Named Entity Driven Zero-Shot Image Manipulation-
[pdf]
[supp]
[bibtex]@InProceedings{Feng_2024_CVPR, author = {Feng, Zhida and Chen, Li and Tian, Jing and Liu, JiaXiang and Feng, Shikun}, title = {Named Entity Driven Zero-Shot Image Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9110-9119} }
Relational Matching for Weakly Semi-Supervised Oriented Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Wenhao and Wong, Hau-San and Wu, Si and Zhang, Tianyou}, title = {Relational Matching for Weakly Semi-Supervised Oriented Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27800-27810} }
Rethinking the Representation in Federated Unsupervised Learning with Non-IID Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liao_2024_CVPR, author = {Liao, Xinting and Liu, Weiming and Chen, Chaochao and Zhou, Pengyang and Yu, Fengyuan and Zhu, Huabin and Yao, Binhui and Wang, Tao and Zheng, Xiaolin and Tan, Yanchao}, title = {Rethinking the Representation in Federated Unsupervised Learning with Non-IID Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22841-22850} }
Distraction is All You Need: Memory-Efficient Image Immunization against Diffusion-Based Image Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Lo_2024_CVPR, author = {Lo, Ling and Yeo, Cheng Yu and Shuai, Hong-Han and Cheng, Wen-Huang}, title = {Distraction is All You Need: Memory-Efficient Image Immunization against Diffusion-Based Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24462-24471} }
Knowledge-Enhanced Dual-stream Zero-shot Composed Image Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Suo_2024_CVPR, author = {Suo, Yucheng and Ma, Fan and Zhu, Linchao and Yang, Yi}, title = {Knowledge-Enhanced Dual-stream Zero-shot Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26951-26962} }
Taming Self-Training for Open-Vocabulary Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Shiyu and Schulter, Samuel and Zhao, Long and Zhang, Zhixing and G, Vijay Kumar B and Suh, Yumin and Chandraker, Manmohan and Metaxas, Dimitris N.}, title = {Taming Self-Training for Open-Vocabulary Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13938-13947} }
Grounding and Enhancing Grid-based Models for Neural Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Zelin and Fan, Fenglei and Liao, Wenlong and Yan, Junchi}, title = {Grounding and Enhancing Grid-based Models for Neural Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19425-19435} }
Bilateral Propagation Network for Depth Completion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2024_CVPR, author = {Tang, Jie and Tian, Fei-Peng and An, Boshi and Li, Jian and Tan, Ping}, title = {Bilateral Propagation Network for Depth Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9763-9772} }
ESR-NeRF: Emissive Source Reconstruction Using LDR Multi-view Images-
[pdf]
[supp]
[bibtex]@InProceedings{Jeong_2024_CVPR, author = {Jeong, Jinseo and Koo, Junseo and Zhang, Qimeng and Kim, Gunhee}, title = {ESR-NeRF: Emissive Source Reconstruction Using LDR Multi-view Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4598-4609} }
Infer from What You Have Seen Before: Temporally-dependent Classifier for Semi-supervised Video Segmentation-
[pdf]
[bibtex]@InProceedings{Zhuang_2024_CVPR, author = {Zhuang, Jiafan and Wang, Zilei and Zhang, Yixin and Fan, Zhun}, title = {Infer from What You Have Seen Before: Temporally-dependent Classifier for Semi-supervised Video Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3575-3584} }
Unleashing Channel Potential: Space-Frequency Selection Convolution for SAR Object Detection-
[pdf]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Ke and Wang, Di and Hu, Zhangyuan and Zhu, Wenxuan and Li, Shaofeng and Wang, Quan}, title = {Unleashing Channel Potential: Space-Frequency Selection Convolution for SAR Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17323-17332} }
READ: Retrieval-Enhanced Asymmetric Diffusion for Motion Planning-
[pdf]
[supp]
[bibtex]@InProceedings{Oba_2024_CVPR, author = {Oba, Takeru and Walter, Matthew and Ukita, Norimichi}, title = {READ: Retrieval-Enhanced Asymmetric Diffusion for Motion Planning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17974-17984} }
Video Frame Interpolation via Direct Synthesis with the Event-based Reference-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Yuhan and Deng, Yongjian and Chen, Hao and Yang, Zhen}, title = {Video Frame Interpolation via Direct Synthesis with the Event-based Reference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8477-8487} }
DSL-FIQA: Assessing Facial Image Quality via Dual-Set Degradation Learning and Landmark-Guided Transformer-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Wei-Ting and Krishnan, Gurunandan and Gao, Qiang and Kuo, Sy-Yen and Ma, Sizhou and Wang, Jian}, title = {DSL-FIQA: Assessing Facial Image Quality via Dual-Set Degradation Learning and Landmark-Guided Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2931-2941} }
FMA-Net: Flow-Guided Dynamic Filtering and Iterative Feature Refinement with Multi-Attention for Joint Video Super-Resolution and Deblurring-
[pdf]
[supp]
[bibtex]@InProceedings{Youk_2024_CVPR, author = {Youk, Geunhyuk and Oh, Jihyong and Kim, Munchurl}, title = {FMA-Net: Flow-Guided Dynamic Filtering and Iterative Feature Refinement with Multi-Attention for Joint Video Super-Resolution and Deblurring}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {44-55} }
OVMR: Open-Vocabulary Recognition with Multi-Modal References-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2024_CVPR, author = {Ma, Zehong and Zhang, Shiliang and Wei, Longhui and Tian, Qi}, title = {OVMR: Open-Vocabulary Recognition with Multi-Modal References}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16571-16581} }
Hourglass Tokenizer for Efficient Transformer-Based 3D Human Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Wenhao and Liu, Mengyuan and Liu, Hong and Wang, Pichao and Cai, Jialun and Sebe, Nicu}, title = {Hourglass Tokenizer for Efficient Transformer-Based 3D Human Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {604-613} }
Boosting Diffusion Models with Moving Average Sampling in Frequency Domain-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qian_2024_CVPR, author = {Qian, Yurui and Cai, Qi and Pan, Yingwei and Li, Yehao and Yao, Ting and Sun, Qibin and Mei, Tao}, title = {Boosting Diffusion Models with Moving Average Sampling in Frequency Domain}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8911-8920} }
GART: Gaussian Articulated Template Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lei_2024_CVPR, author = {Lei, Jiahui and Wang, Yufu and Pavlakos, Georgios and Liu, Lingjie and Daniilidis, Kostas}, title = {GART: Gaussian Articulated Template Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19876-19887} }
Global and Local Prompts Cooperation via Optimal Transport for Federated Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Hongxia and Huang, Wei and Wang, Jingya and Shi, Ye}, title = {Global and Local Prompts Cooperation via Optimal Transport for Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12151-12161} }
Bi-Causal: Group Activity Recognition via Bidirectional Causality-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Youliang and Liu, Wenxuan and Xu, Danni and Zhou, Zhuo and Wang, Zheng}, title = {Bi-Causal: Group Activity Recognition via Bidirectional Causality}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1450-1459} }
Space-Time Diffusion Features for Zero-Shot Text-Driven Motion Transfer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yatim_2024_CVPR, author = {Yatim, Danah and Fridman, Rafail and Bar-Tal, Omer and Kasten, Yoni and Dekel, Tali}, title = {Space-Time Diffusion Features for Zero-Shot Text-Driven Motion Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8466-8476} }
KP-RED: Exploiting Semantic Keypoints for Joint 3D Shape Retrieval and Deformation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Ruida and Zhang, Chenyangguang and Di, Yan and Manhardt, Fabian and Liu, Xingyu and Tombari, Federico and Ji, Xiangyang}, title = {KP-RED: Exploiting Semantic Keypoints for Joint 3D Shape Retrieval and Deformation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20540-20550} }
Learning from One Continuous Video Stream-
[pdf]
[arXiv]
[bibtex]@InProceedings{Carreira_2024_CVPR, author = {Carreira, Jo\~ao and King, Michael and Patraucean, Viorica and Gokay, Dilara and Ionescu, Catalin and Yang, Yi and Zoran, Daniel and Heyward, Joseph and Doersch, Carl and Aytar, Yusuf and Damen, Dima and Zisserman, Andrew}, title = {Learning from One Continuous Video Stream}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28751-28761} }
VGGSfM: Visual Geometry Grounded Deep Structure From Motion-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Jianyuan and Karaev, Nikita and Rupprecht, Christian and Novotny, David}, title = {VGGSfM: Visual Geometry Grounded Deep Structure From Motion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21686-21697} }
MIGC: Multi-Instance Generation Controller for Text-to-Image Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Dewei and Li, You and Ma, Fan and Zhang, Xiaoting and Yang, Yi}, title = {MIGC: Multi-Instance Generation Controller for Text-to-Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6818-6828} }
Distilling CLIP with Dual Guidance for Learning Discriminative Human Body Shape Representation-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Feng and Kim, Minchul and Ren, Zhiyuan and Liu, Xiaoming}, title = {Distilling CLIP with Dual Guidance for Learning Discriminative Human Body Shape Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {256-266} }
Retrieval-Augmented Open-Vocabulary Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Jooyeon and Cho, Eulrang and Kim, Sehyung and Kim, Hyunwoo J.}, title = {Retrieval-Augmented Open-Vocabulary Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17427-17436} }
MULTIFLOW: Shifting Towards Task-Agnostic Vision-Language Pruning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Farina_2024_CVPR, author = {Farina, Matteo and Mancini, Massimiliano and Cunegatti, Elia and Liu, Gaowen and Iacca, Giovanni and Ricci, Elisa}, title = {MULTIFLOW: Shifting Towards Task-Agnostic Vision-Language Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16185-16195} }
Spin-UP: Spin Light for Natural Light Uncalibrated Photometric Stereo-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Zongrui and Lu, Zhan and Yan, Haojie and Shi, Boxin and Pan, Gang and Zheng, Qian and Jiang, Xudong}, title = {Spin-UP: Spin Light for Natural Light Uncalibrated Photometric Stereo}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11905-11914} }
LLaFS: When Large Language Models Meet Few-Shot Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Lanyun and Chen, Tianrun and Ji, Deyi and Ye, Jieping and Liu, Jun}, title = {LLaFS: When Large Language Models Meet Few-Shot Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3065-3075} }
Kernel Adaptive Convolution for Scene Text Detection via Distance Map Prediction-
[pdf]
[bibtex]@InProceedings{Zheng_2024_CVPR, author = {Zheng, Jinzhi and Fan, Heng and Zhang, Libo}, title = {Kernel Adaptive Convolution for Scene Text Detection via Distance Map Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5957-5966} }
PixelLM: Pixel Reasoning with Large Multimodal Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2024_CVPR, author = {Ren, Zhongwei and Huang, Zhicheng and Wei, Yunchao and Zhao, Yao and Fu, Dongmei and Feng, Jiashi and Jin, Xiaojie}, title = {PixelLM: Pixel Reasoning with Large Multimodal Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26374-26383} }
MRFS: Mutually Reinforcing Image Fusion and Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Hao and Zuo, Xuhui and Jiang, Jie and Guo, Chunchao and Ma, Jiayi}, title = {MRFS: Mutually Reinforcing Image Fusion and Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26974-26983} }
MemoNav: Working Memory Model for Visual Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Hongxin and Wang, Zeyu and Yang, Xu and Yang, Yuran and Mei, Shuqi and Zhang, Zhaoxiang}, title = {MemoNav: Working Memory Model for Visual Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17913-17922} }
Robust Depth Enhancement via Polarization Prompt Fusion Tuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ikemura_2024_CVPR, author = {Ikemura, Kei and Huang, Yiming and Heide, Felix and Zhang, Zhaoxiang and Chen, Qifeng and Lei, Chenyang}, title = {Robust Depth Enhancement via Polarization Prompt Fusion Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20710-20720} }
AssistGUI: Task-Oriented PC Graphical User Interface Automation-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2024_CVPR, author = {Gao, Difei and Ji, Lei and Bai, Zechen and Ouyang, Mingyu and Li, Peiran and Mao, Dongxing and Wu, Qinchen and Zhang, Weichen and Wang, Peiyi and Guo, Xiangwu and Wang, Hengxu and Zhou, Luowei and Shou, Mike Zheng}, title = {AssistGUI: Task-Oriented PC Graphical User Interface Automation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13289-13298} }
Adaptive Multi-Modal Cross-Entropy Loss for Stereo Matching-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Peng and Xiang, Zhiyu and Qiao, Chengyu and Fu, Jingyun and Pu, Tianyu}, title = {Adaptive Multi-Modal Cross-Entropy Loss for Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5135-5144} }
Unlocking the Potential of Prompt-Tuning in Bridging Generalized and Personalized Federated Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deng_2024_CVPR, author = {Deng, Wenlong and Thrampoulidis, Christos and Li, Xiaoxiao}, title = {Unlocking the Potential of Prompt-Tuning in Bridging Generalized and Personalized Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6087-6097} }
Compact 3D Gaussian Representation for Radiance Field-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2024_CVPR, author = {Lee, Joo Chan and Rho, Daniel and Sun, Xiangyu and Ko, Jong Hwan and Park, Eunbyung}, title = {Compact 3D Gaussian Representation for Radiance Field}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21719-21728} }
PaSCo: Urban 3D Panoptic Scene Completion with Uncertainty Awareness-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2024_CVPR, author = {Cao, Anh-Quan and Dai, Angela and de Charette, Raoul}, title = {PaSCo: Urban 3D Panoptic Scene Completion with Uncertainty Awareness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14554-14564} }
GALA: Generating Animatable Layered Assets from a Single Scan-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Taeksoo and Kim, Byungjun and Saito, Shunsuke and Joo, Hanbyul}, title = {GALA: Generating Animatable Layered Assets from a Single Scan}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1535-1545} }
LeGO: Leveraging a Surface Deformation Network for Animatable Stylized Face Generation with One Example-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yoon_2024_CVPR, author = {Yoon, Soyeon and Yun, Kwan and Seo, Kwanggyoon and Cha, Sihun and Yoo, Jung Eun and Noh, Junyong}, title = {LeGO: Leveraging a Surface Deformation Network for Animatable Stylized Face Generation with One Example}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4505-4514} }
Frequency-Adaptive Dilated Convolution for Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Linwei and Gu, Lin and Zheng, Dezhi and Fu, Ying}, title = {Frequency-Adaptive Dilated Convolution for Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3414-3425} }
3D Building Reconstruction from Monocular Remote Sensing Images with Multi-level Supervisions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Weijia and Yang, Haote and Hu, Zhenghao and Zheng, Juepeng and Xia, Gui-Song and He, Conghui}, title = {3D Building Reconstruction from Monocular Remote Sensing Images with Multi-level Supervisions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27728-27737} }
PhyScene: Physically Interactable 3D Scene Synthesis for Embodied AI-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Yandan and Jia, Baoxiong and Zhi, Peiyuan and Huang, Siyuan}, title = {PhyScene: Physically Interactable 3D Scene Synthesis for Embodied AI}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16262-16272} }
Generative Latent Coding for Ultra-Low Bitrate Image Compression-
[pdf]
[supp]
[bibtex]@InProceedings{Jia_2024_CVPR, author = {Jia, Zhaoyang and Li, Jiahao and Li, Bin and Li, Houqiang and Lu, Yan}, title = {Generative Latent Coding for Ultra-Low Bitrate Image Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26088-26098} }
Multiple View Geometry Transformers for 3D Human Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liao_2024_CVPR, author = {Liao, Ziwei and Zhu, Jialiang and Wang, Chunyu and Hu, Han and Waslander, Steven L.}, title = {Multiple View Geometry Transformers for 3D Human Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {708-717} }
SiTH: Single-view Textured Human Reconstruction with Image-Conditioned Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{I_Ho_2024_CVPR, author = {I Ho, Hsuan- and Song, Jie and Hilliges, Otmar}, title = {SiTH: Single-view Textured Human Reconstruction with Image-Conditioned Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {538-549} }
Distributionally Generative Augmentation for Fair Facial Attribute Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Fengda and He, Qianpei and Kuang, Kun and Liu, Jiashuo and Chen, Long and Wu, Chao and Xiao, Jun and Zhang, Hanwang}, title = {Distributionally Generative Augmentation for Fair Facial Attribute Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22797-22808} }
DynVideo-E: Harnessing Dynamic NeRF for Large-Scale Motion- and View-Change Human-Centric Video Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Jia-Wei and Cao, Yan-Pei and Wu, Jay Zhangjie and Mao, Weijia and Gu, Yuchao and Zhao, Rui and Keppo, Jussi and Shan, Ying and Shou, Mike Zheng}, title = {DynVideo-E: Harnessing Dynamic NeRF for Large-Scale Motion- and View-Change Human-Centric Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7664-7674} }
Real-Time Neural BRDF with Spherically Distributed Primitives-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dou_2024_CVPR, author = {Dou, Yishun and Zheng, Zhong and Jin, Qiaoqiao and Ni, Bingbing and Chen, Yugang and Ke, Junxiang}, title = {Real-Time Neural BRDF with Spherically Distributed Primitives}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4337-4346} }
Harnessing Meta-Learning for Improving Full-Frame Video Stabilization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ali_2024_CVPR, author = {Ali, Muhammad Kashif and Im, Eun Woo and Kim, Dongjin and Kim, Tae Hyun}, title = {Harnessing Meta-Learning for Improving Full-Frame Video Stabilization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12605-12614} }
VideoCrafter2: Overcoming Data Limitations for High-Quality Video Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Haoxin and Zhang, Yong and Cun, Xiaodong and Xia, Menghan and Wang, Xintao and Weng, Chao and Shan, Ying}, title = {VideoCrafter2: Overcoming Data Limitations for High-Quality Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7310-7320} }
From SAM to CAMs: Exploring Segment Anything Model for Weakly Supervised Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Kweon_2024_CVPR, author = {Kweon, Hyeokjun and Yoon, Kuk-Jin}, title = {From SAM to CAMs: Exploring Segment Anything Model for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19499-19509} }
Boosting Flow-based Generative Super-Resolution Models via Learned Prior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tsao_2024_CVPR, author = {Tsao, Li-Yuan and Lo, Yi-Chen and Chang, Chia-Che and Chen, Hao-Wei and Tseng, Roy and Feng, Chien and Lee, Chun-Yi}, title = {Boosting Flow-based Generative Super-Resolution Models via Learned Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26005-26015} }
How to Handle Sketch-Abstraction in Sketch-Based Image Retrieval?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Koley_2024_CVPR, author = {Koley, Subhadeep and Bhunia, Ayan Kumar and Sain, Aneeshan and Chowdhury, Pinaki Nath and Xiang, Tao and Song, Yi-Zhe}, title = {How to Handle Sketch-Abstraction in Sketch-Based Image Retrieval?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16859-16869} }
What You See is What You GAN: Rendering Every Pixel for High-Fidelity Geometry in 3D GANs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Trevithick_2024_CVPR, author = {Trevithick, Alex and Chan, Matthew and Takikawa, Towaki and Iqbal, Umar and De Mello, Shalini and Chandraker, Manmohan and Ramamoorthi, Ravi and Nagano, Koki}, title = {What You See is What You GAN: Rendering Every Pixel for High-Fidelity Geometry in 3D GANs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22765-22775} }
Style Injection in Diffusion: A Training-free Approach for Adapting Large-scale Diffusion Models for Style Transfer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chung_2024_CVPR, author = {Chung, Jiwoo and Hyun, Sangeek and Heo, Jae-Pil}, title = {Style Injection in Diffusion: A Training-free Approach for Adapting Large-scale Diffusion Models for Style Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8795-8805} }
Towards Robust Learning to Optimize with Theoretical Guarantees-
[pdf]
[supp]
[bibtex]@InProceedings{Song_2024_CVPR, author = {Song, Qingyu and Lin, Wei and Wang, Juncheng and Xu, Hong}, title = {Towards Robust Learning to Optimize with Theoretical Guarantees}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27498-27506} }
Differentiable Neural Surface Refinement for Modeling Transparent Objects-
[pdf]
[supp]
[bibtex]@InProceedings{Deng_2024_CVPR, author = {Deng, Weijian and Campbell, Dylan and Sun, Chunyi and Kanitkar, Shubham and Shaffer, Matthew E. and Gould, Stephen}, title = {Differentiable Neural Surface Refinement for Modeling Transparent Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20268-20277} }
OrthCaps: An Orthogonal CapsNet with Sparse Attention Routing and Pruning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Geng_2024_CVPR, author = {Geng, Xinyu and Wang, Jiaming and Gong, Jiawei and Xue, Yuerong and Xu, Jun and Chen, Fanglin and Huang, Xiaolin}, title = {OrthCaps: An Orthogonal CapsNet with Sparse Attention Routing and Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6037-6046} }
ProS: Prompting-to-simulate Generalized knowledge for Universal Cross-Domain Retrieval-
[pdf]
[arXiv]
[bibtex]@InProceedings{Fang_2024_CVPR, author = {Fang, Kaipeng and Song, Jingkuan and Gao, Lianli and Zeng, Pengpeng and Cheng, Zhi-Qi and Li, Xiyao and Shen, Heng Tao}, title = {ProS: Prompting-to-simulate Generalized knowledge for Universal Cross-Domain Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17292-17301} }
Florence-2: Advancing a Unified Representation for a Variety of Vision Tasks-
[pdf]
[supp]
[bibtex]@InProceedings{Xiao_2024_CVPR, author = {Xiao, Bin and Wu, Haiping and Xu, Weijian and Dai, Xiyang and Hu, Houdong and Lu, Yumao and Zeng, Michael and Liu, Ce and Yuan, Lu}, title = {Florence-2: Advancing a Unified Representation for a Variety of Vision Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4818-4829} }
NeRF On-the-go: Exploiting Uncertainty for Distractor-free NeRFs in the Wild-
[pdf]
[supp]
[bibtex]@InProceedings{Ren_2024_CVPR, author = {Ren, Weining and Zhu, Zihan and Sun, Boyang and Chen, Jiaqi and Pollefeys, Marc and Peng, Songyou}, title = {NeRF On-the-go: Exploiting Uncertainty for Distractor-free NeRFs in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8931-8940} }
3D Human Pose Perception from Egocentric Stereo Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Akada_2024_CVPR, author = {Akada, Hiroyasu and Wang, Jian and Golyanik, Vladislav and Theobalt, Christian}, title = {3D Human Pose Perception from Egocentric Stereo Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {767-776} }
Grid Diffusion Models for Text-to-Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2024_CVPR, author = {Lee, Taegyeong and Kwon, Soyeong and Kim, Taehwan}, title = {Grid Diffusion Models for Text-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8734-8743} }
Boosting Object Detection with Zero-Shot Day-Night Domain Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Du_2024_CVPR, author = {Du, Zhipeng and Shi, Miaojing and Deng, Jiankang}, title = {Boosting Object Detection with Zero-Shot Day-Night Domain Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12666-12676} }
LucidDreamer: Towards High-Fidelity Text-to-3D Generation via Interval Score Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2024_CVPR, author = {Liang, Yixun and Yang, Xin and Lin, Jiantao and Li, Haodong and Xu, Xiaogang and Chen, Yingcong}, title = {LucidDreamer: Towards High-Fidelity Text-to-3D Generation via Interval Score Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6517-6526} }
PTM-VQA: Efficient Video Quality Assessment Leveraging Diverse PreTrained Models from the Wild-
[pdf]
[bibtex]@InProceedings{Yuan_2024_CVPR, author = {Yuan, Kun and Liu, Hongbo and Li, Mading and Sun, Muyi and Sun, Ming and Gong, Jiachao and Hao, Jinhua and Zhou, Chao and Tang, Yansong}, title = {PTM-VQA: Efficient Video Quality Assessment Leveraging Diverse PreTrained Models from the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2835-2845} }
Versatile Medical Image Segmentation Learned from Multi-Source Datasets via Model Self-Disambiguation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Xiaoyang and Zheng, Hao and Li, Yuemeng and Ma, Yuncong and Ma, Liang and Li, Hongming and Fan, Yong}, title = {Versatile Medical Image Segmentation Learned from Multi-Source Datasets via Model Self-Disambiguation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11747-11756} }
Improving Generalization via Meta-Learning on Hard Samples-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jain_2024_CVPR, author = {Jain, Nishant and Suggala, Arun S. and Shenoy, Pradeep}, title = {Improving Generalization via Meta-Learning on Hard Samples}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27600-27609} }
Align and Aggregate: Compositional Reasoning with Video Alignment and Answer Aggregation for Video Question-Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liao_2024_CVPR, author = {Liao, Zhaohe and Li, Jiangtong and Niu, Li and Zhang, Liqing}, title = {Align and Aggregate: Compositional Reasoning with Video Alignment and Answer Aggregation for Video Question-Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13395-13404} }
REACTO: Reconstructing Articulated Objects from a Single Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2024_CVPR, author = {Song, Chaoyue and Wei, Jiacheng and Foo, Chuan Sheng and Lin, Guosheng and Liu, Fayao}, title = {REACTO: Reconstructing Articulated Objects from a Single Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5384-5395} }
Egocentric Whole-Body Motion Capture with FisheyeViT and Diffusion-Based Motion Refinement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Jian and Cao, Zhe and Luvizon, Diogo and Liu, Lingjie and Sarkar, Kripasindhu and Tang, Danhang and Beeler, Thabo and Theobalt, Christian}, title = {Egocentric Whole-Body Motion Capture with FisheyeViT and Diffusion-Based Motion Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {777-787} }
Language Embedded 3D Gaussians for Open-Vocabulary Scene Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2024_CVPR, author = {Shi, Jin-Chuan and Wang, Miao and Duan, Hao-Bin and Guan, Shao-Hua}, title = {Language Embedded 3D Gaussians for Open-Vocabulary Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5333-5343} }
Towards Automated Movie Trailer Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Argaw_2024_CVPR, author = {Argaw, Dawit Mureja and Soldan, Mattia and Pardo, Alejandro and Zhao, Chen and Heilbron, Fabian Caba and Chung, Joon Son and Ghanem, Bernard}, title = {Towards Automated Movie Trailer Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7445-7454} }
Differentiable Information Bottleneck for Deterministic Multi-view Clustering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2024_CVPR, author = {Yan, Xiaoqiang and Jin, Zhixiang and Han, Fengshou and Ye, Yangdong}, title = {Differentiable Information Bottleneck for Deterministic Multi-view Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27435-27444} }
Sheared Backpropagation for Fine-tuning Foundation Models-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Zhiyuan and Shen, Li and Ding, Liang and Tian, Xinmei and Chen, Yixin and Tao, Dacheng}, title = {Sheared Backpropagation for Fine-tuning Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5883-5892} }
Action-slot: Visual Action-centric Representations for Multi-label Atomic Activity Recognition in Traffic Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Kung_2024_CVPR, author = {Kung, Chi-Hsi and Lu, Shu-Wei and Tsai, Yi-Hsuan and Chen, Yi-Ting}, title = {Action-slot: Visual Action-centric Representations for Multi-label Atomic Activity Recognition in Traffic Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18451-18461} }
Animatable Gaussians: Learning Pose-dependent Gaussian Maps for High-fidelity Human Avatar Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Zhe and Zheng, Zerong and Wang, Lizhen and Liu, Yebin}, title = {Animatable Gaussians: Learning Pose-dependent Gaussian Maps for High-fidelity Human Avatar Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19711-19722} }
Latency Correction for Event-guided Deblurring and Frame Interpolation-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Yixin and Liang, Jinxiu and Yu, Bohan and Chen, Yan and Ren, Jimmy S. and Shi, Boxin}, title = {Latency Correction for Event-guided Deblurring and Frame Interpolation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24977-24986} }
Retraining-Free Model Quantization via One-Shot Weight-Coupling Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2024_CVPR, author = {Tang, Chen and Meng, Yuan and Jiang, Jiacheng and Xie, Shuzhao and Lu, Rongwei and Ma, Xinzhu and Wang, Zhi and Zhu, Wenwu}, title = {Retraining-Free Model Quantization via One-Shot Weight-Coupling Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15855-15865} }
EVCap: Retrieval-Augmented Image Captioning with External Visual-Name Memory for Open-World Comprehension-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Jiaxuan and Vo, Duc Minh and Sugimoto, Akihiro and Nakayama, Hideki}, title = {EVCap: Retrieval-Augmented Image Captioning with External Visual-Name Memory for Open-World Comprehension}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13733-13742} }
SIFU: Side-view Conditioned Implicit Function for Real-world Usable Clothed Human Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Zechuan and Yang, Zongxin and Yang, Yi}, title = {SIFU: Side-view Conditioned Implicit Function for Real-world Usable Clothed Human Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9936-9947} }
WinSyn: : A High Resolution Testbed for Synthetic Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kelly_2024_CVPR, author = {Kelly, Tom and Femiani, John and Wonka, Peter}, title = {WinSyn: : A High Resolution Testbed for Synthetic Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22456-22465} }
Autoregressive Queries for Adaptive Tracking with Spatio-Temporal Transformers-
[pdf]
[bibtex]@InProceedings{Xie_2024_CVPR, author = {Xie, Jinxia and Zhong, Bineng and Mo, Zhiyi and Zhang, Shengping and Shi, Liangtao and Song, Shuxiang and Ji, Rongrong}, title = {Autoregressive Queries for Adaptive Tracking with Spatio-Temporal Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19300-19309} }
Misalignment-Robust Frequency Distribution Loss for Image Transformation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ni_2024_CVPR, author = {Ni, Zhangkai and Wu, Juncheng and Wang, Zian and Yang, Wenhan and Wang, Hanli and Ma, Lin}, title = {Misalignment-Robust Frequency Distribution Loss for Image Transformation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2910-2919} }
Language-aware Visual Semantic Distillation for Video Question Answering-
[pdf]
[supp]
[bibtex]@InProceedings{Zou_2024_CVPR, author = {Zou, Bo and Yang, Chao and Qiao, Yu and Quan, Chengbin and Zhao, Youjian}, title = {Language-aware Visual Semantic Distillation for Video Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27113-27123} }
Lane2Seq: Towards Unified Lane Detection via Sequence Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Kunyang}, title = {Lane2Seq: Towards Unified Lane Detection via Sequence Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16944-16953} }
Disentangled Prompt Representation for Domain Generalization-
[pdf]
[bibtex]@InProceedings{Cheng_2024_CVPR, author = {Cheng, De and Xu, Zhipeng and Jiang, Xinyang and Wang, Nannan and Li, Dongsheng and Gao, Xinbo}, title = {Disentangled Prompt Representation for Domain Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23595-23604} }
Abductive Ego-View Accident Video Understanding for Safe Driving Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2024_CVPR, author = {Fang, Jianwu and Li, Lei-lei and Zhou, Junfei and Xiao, Junbin and Yu, Hongkai and Lv, Chen and Xue, Jianru and Chua, Tat-Seng}, title = {Abductive Ego-View Accident Video Understanding for Safe Driving Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22030-22040} }
Cross-spectral Gated-RGB Stereo Depth Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Brucker_2024_CVPR, author = {Brucker, Samuel and Walz, Stefanie and Bijelic, Mario and Heide, Felix}, title = {Cross-spectral Gated-RGB Stereo Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21654-21665} }
KVQ: Kwai Video Quality Assessment for Short-form Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2024_CVPR, author = {Lu, Yiting and Li, Xin and Pei, Yajing and Yuan, Kun and Xie, Qizhi and Qu, Yunpeng and Sun, Ming and Zhou, Chao and Chen, Zhibo}, title = {KVQ: Kwai Video Quality Assessment for Short-form Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25963-25973} }
Degrees of Freedom Matter: Inferring Dynamics from Point Trajectories-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yan and Prokudin, Sergey and Mihajlovic, Marko and Ma, Qianli and Tang, Siyu}, title = {Degrees of Freedom Matter: Inferring Dynamics from Point Trajectories}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2018-2028} }
LEMON: Learning 3D Human-Object Interaction Relation from 2D Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Yuhang and Zhai, Wei and Luo, Hongchen and Cao, Yang and Zha, Zheng-Jun}, title = {LEMON: Learning 3D Human-Object Interaction Relation from 2D Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16284-16295} }
Low-Latency Neural Stereo Streaming-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hou_2024_CVPR, author = {Hou, Qiqi and Farhadzadeh, Farzad and Said, Amir and Sautiere, Guillaume and Le, Hoang}, title = {Low-Latency Neural Stereo Streaming}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7974-7984} }
Understanding Video Transformers via Universal Concept Discovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kowal_2024_CVPR, author = {Kowal, Matthew and Dave, Achal and Ambrus, Rares and Gaidon, Adrien and Derpanis, Konstantinos G. and Tokmakov, Pavel}, title = {Understanding Video Transformers via Universal Concept Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10946-10956} }
Exploring the Transferability of Visual Prompting for Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yichi and Dong, Yinpeng and Zhang, Siyuan and Min, Tianzan and Su, Hang and Zhu, Jun}, title = {Exploring the Transferability of Visual Prompting for Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26562-26572} }
PointOBB: Learning Oriented Object Detection via Single Point Supervision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2024_CVPR, author = {Luo, Junwei and Yang, Xue and Yu, Yi and Li, Qingyun and Yan, Junchi and Li, Yansheng}, title = {PointOBB: Learning Oriented Object Detection via Single Point Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16730-16740} }
Intrinsic Image Diffusion for Indoor Single-view Material Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Kocsis_2024_CVPR, author = {Kocsis, Peter and Sitzmann, Vincent and Nie{\ss}ner, Matthias}, title = {Intrinsic Image Diffusion for Indoor Single-view Material Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5198-5208} }
SHAP-EDITOR: Instruction-Guided Latent 3D Editing in Seconds-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Minghao and Xie, Junyu and Laina, Iro and Vedaldi, Andrea}, title = {SHAP-EDITOR: Instruction-Guided Latent 3D Editing in Seconds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26456-26466} }
HyperSDFusion: Bridging Hierarchical Structures in Language and Geometry for Enhanced 3D Text2Shape Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Leng_2024_CVPR, author = {Leng, Zhiying and Birdal, Tolga and Liang, Xiaohui and Tombari, Federico}, title = {HyperSDFusion: Bridging Hierarchical Structures in Language and Geometry for Enhanced 3D Text2Shape Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19691-19700} }
OmniParser: A Unified Framework for Text Spotting Key Information Extraction and Table Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wan_2024_CVPR, author = {Wan, Jianqiang and Song, Sibo and Yu, Wenwen and Liu, Yuliang and Cheng, Wenqing and Huang, Fei and Bai, Xiang and Yao, Cong and Yang, Zhibo}, title = {OmniParser: A Unified Framework for Text Spotting Key Information Extraction and Table Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15641-15653} }
Are Conventional SNNs Really Efficient? A Perspective from Network Quantization-
[pdf]
[supp]
[bibtex]@InProceedings{Shen_2024_CVPR, author = {Shen, Guobin and Zhao, Dongcheng and Li, Tenglong and Li, Jindong and Zeng, Yi}, title = {Are Conventional SNNs Really Efficient? A Perspective from Network Quantization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27538-27547} }
Training Like a Medical Resident: Context-Prior Learning Toward Universal Medical Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2024_CVPR, author = {Gao, Yunhe}, title = {Training Like a Medical Resident: Context-Prior Learning Toward Universal Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11194-11204} }
Material Palette: Extraction of Materials from a Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lopes_2024_CVPR, author = {Lopes, Ivan and Pizzati, Fabio and de Charette, Raoul}, title = {Material Palette: Extraction of Materials from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4379-4388} }
Initialization Matters for Adversarial Transfer Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hua_2024_CVPR, author = {Hua, Andong and Gu, Jindong and Xue, Zhiyu and Carlini, Nicholas and Wong, Eric and Qin, Yao}, title = {Initialization Matters for Adversarial Transfer Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24831-24840} }
RealCustom: Narrowing Real Text Word for Real-Time Open-Domain Text-to-Image Customization-
[pdf]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Mengqi and Mao, Zhendong and Liu, Mingcong and He, Qian and Zhang, Yongdong}, title = {RealCustom: Narrowing Real Text Word for Real-Time Open-Domain Text-to-Image Customization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7476-7485} }
MicroDiffusion: Implicit Representation-Guided Diffusion for 3D Reconstruction from Limited 2D Microscopy Projections-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hui_2024_CVPR, author = {Hui, Mude and Wei, Zihao and Zhu, Hongru and Xia, Fei and Zhou, Yuyin}, title = {MicroDiffusion: Implicit Representation-Guided Diffusion for 3D Reconstruction from Limited 2D Microscopy Projections}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11460-11469} }
Task-Conditioned Adaptation of Visual Features in Multi-Task Policy Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Marza_2024_CVPR, author = {Marza, Pierre and Matignon, Laetitia and Simonin, Olivier and Wolf, Christian}, title = {Task-Conditioned Adaptation of Visual Features in Multi-Task Policy Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17847-17856} }
L0-Sampler: An L0 Model Guided Volume Sampling for NeRF-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Liangchen and Zhang, Juyong}, title = {L0-Sampler: An L0 Model Guided Volume Sampling for NeRF}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21390-21400} }
Hybrid Proposal Refiner: Revisiting DETR Series from the Faster R-CNN Perspective-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Jinjing and Wei, Fangyun and Xu, Chang}, title = {Hybrid Proposal Refiner: Revisiting DETR Series from the Faster R-CNN Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17416-17426} }
Practical Measurements of Translucent Materials with Inter-Pixel Translucency Prior-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Zhenyu and Guo, Jie and Lai, Shuichang and Fu, Ruoyu and Kong, Mengxun and Wang, Chen and Sun, Hongyu and Zhang, Zhebin and Li, Chen and Guo, Yanwen}, title = {Practical Measurements of Translucent Materials with Inter-Pixel Translucency Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20932-20942} }
TurboSL: Dense Accurate and Fast 3D by Neural Inverse Structured Light-
[pdf]
[supp]
[bibtex]@InProceedings{Mirdehghan_2024_CVPR, author = {Mirdehghan, Parsa and Wu, Maxx and Chen, Wenzheng and Lindell, David B. and Kutulakos, Kiriakos N.}, title = {TurboSL: Dense Accurate and Fast 3D by Neural Inverse Structured Light}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25067-25076} }
Text2QR: Harmonizing Aesthetic Customization and Scanning Robustness for Text-Guided QR Code Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Guangyang and Liu, Xiaohong and Jia, Jun and Cui, Xuehao and Zhai, Guangtao}, title = {Text2QR: Harmonizing Aesthetic Customization and Scanning Robustness for Text-Guided QR Code Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8456-8465} }
GS-IR: 3D Gaussian Splatting for Inverse Rendering-
[pdf]
[supp]
[bibtex]@InProceedings{Liang_2024_CVPR, author = {Liang, Zhihao and Zhang, Qi and Feng, Ying and Shan, Ying and Jia, Kui}, title = {GS-IR: 3D Gaussian Splatting for Inverse Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21644-21653} }
SynFog: A Photo-realistic Synthetic Fog Dataset based on End-to-end Imaging Simulation for Advancing Real-World Defogging in Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2024_CVPR, author = {Xie, Yiming and Wei, Henglu and Liu, Zhenyi and Wang, Xiaoyu and Ji, Xiangyang}, title = {SynFog: A Photo-realistic Synthetic Fog Dataset based on End-to-end Imaging Simulation for Advancing Real-World Defogging in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21763-21772} }
Video Harmonization with Triplet Spatio-Temporal Variation Patterns-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2024_CVPR, author = {Guo, Zonghui and Han, Xinyu and Zhang, Jie and Shan, Shiguang and Zheng, Haiyong}, title = {Video Harmonization with Triplet Spatio-Temporal Variation Patterns}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19177-19186} }
TRINS: Towards Multimodal Language Models that Can Read-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Ruiyi and Zhang, Yanzhe and Chen, Jian and Zhou, Yufan and Gu, Jiuxiang and Chen, Changyou and Sun, Tong}, title = {TRINS: Towards Multimodal Language Models that Can Read}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22584-22594} }
Self-Supervised Representation Learning from Arbitrary Scenarios-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Zhaowen and Zhu, Yousong and Chen, Zhiyang and Gao, Zongxin and Zhao, Rui and Zhao, Chaoyang and Tang, Ming and Wang, Jinqiao}, title = {Self-Supervised Representation Learning from Arbitrary Scenarios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22967-22977} }
Improved Zero-Shot Classification by Adapting VLMs with Text Descriptions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Saha_2024_CVPR, author = {Saha, Oindrila and Van Horn, Grant and Maji, Subhransu}, title = {Improved Zero-Shot Classification by Adapting VLMs with Text Descriptions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17542-17552} }
Living Scenes: Multi-object Relocalization and Reconstruction in Changing 3D Environments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Liyuan and Huang, Shengyu and Schindler, Konrad and Armeni, Iro}, title = {Living Scenes: Multi-object Relocalization and Reconstruction in Changing 3D Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28014-28024} }
CricaVPR: Cross-image Correlation-aware Representation Learning for Visual Place Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2024_CVPR, author = {Lu, Feng and Lan, Xiangyuan and Zhang, Lijun and Jiang, Dongmei and Wang, Yaowei and Yuan, Chun}, title = {CricaVPR: Cross-image Correlation-aware Representation Learning for Visual Place Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16772-16782} }
ECLIPSE: A Resource-Efficient Text-to-Image Prior for Image Generations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Patel_2024_CVPR, author = {Patel, Maitreya and Kim, Changhoon and Cheng, Sheng and Baral, Chitta and Yang, Yezhou}, title = {ECLIPSE: A Resource-Efficient Text-to-Image Prior for Image Generations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9069-9078} }
Adaptive Bidirectional Displacement for Semi-Supervised Medical Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chi_2024_CVPR, author = {Chi, Hanyang and Pang, Jian and Zhang, Bingfeng and Liu, Weifeng}, title = {Adaptive Bidirectional Displacement for Semi-Supervised Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4070-4080} }
Accurate Training Data for Occupancy Map Prediction in Automated Driving Using Evidence Theory-
[pdf]
[supp]
[bibtex]@InProceedings{Kalble_2024_CVPR, author = {K\"alble, Jonas and Wirges, Sascha and Tatarchenko, Maxim and Ilg, Eddy}, title = {Accurate Training Data for Occupancy Map Prediction in Automated Driving Using Evidence Theory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5281-5290} }
DiffusionLight: Light Probes for Free by Painting a Chrome Ball-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Phongthawee_2024_CVPR, author = {Phongthawee, Pakkapon and Chinchuthakun, Worameth and Sinsunthithet, Nontaphat and Jampani, Varun and Raj, Amit and Khungurn, Pramook and Suwajanakorn, Supasorn}, title = {DiffusionLight: Light Probes for Free by Painting a Chrome Ball}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {98-108} }
Instance-level Expert Knowledge and Aggregate Discriminative Attention for Radiology Report Generation-
[pdf]
[bibtex]@InProceedings{Bu_2024_CVPR, author = {Bu, Shenshen and Li, Taiji and Yang, Yuedong and Dai, Zhiming}, title = {Instance-level Expert Knowledge and Aggregate Discriminative Attention for Radiology Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14194-14204} }
Task-Adaptive Saliency Guidance for Exemplar-free Class Incremental Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Xialei and Zhai, Jiang-Tian and Bagdanov, Andrew D. and Li, Ke and Cheng, Ming-Ming}, title = {Task-Adaptive Saliency Guidance for Exemplar-free Class Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23954-23963} }
Rethinking the Spatial Inconsistency in Classifier-Free Diffusion Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shen_2024_CVPR, author = {Shen, Dazhong and Song, Guanglu and Xue, Zeyue and Wang, Fu-Yun and Liu, Yu}, title = {Rethinking the Spatial Inconsistency in Classifier-Free Diffusion Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9370-9379} }
Language-driven All-in-one Adverse Weather Removal-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Hao and Pan, Liyuan and Yang, Yan and Liang, Wei}, title = {Language-driven All-in-one Adverse Weather Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24902-24912} }
Each Test Image Deserves A Specific Prompt: Continual Test-Time Adaptation for 2D Medical Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Ziyang and Pan, Yongsheng and Ye, Yiwen and Lu, Mengkang and Xia, Yong}, title = {Each Test Image Deserves A Specific Prompt: Continual Test-Time Adaptation for 2D Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11184-11193} }
KTPFormer: Kinematics and Trajectory Prior Knowledge-Enhanced Transformer for 3D Human Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2024_CVPR, author = {Peng, Jihua and Zhou, Yanghong and Mok, P. Y.}, title = {KTPFormer: Kinematics and Trajectory Prior Knowledge-Enhanced Transformer for 3D Human Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1123-1132} }
MAPLM: A Real-World Large-Scale Vision-Language Benchmark for Map and Traffic Scene Understanding-
[pdf]
[bibtex]@InProceedings{Cao_2024_CVPR, author = {Cao, Xu and Zhou, Tong and Ma, Yunsheng and Ye, Wenqian and Cui, Can and Tang, Kun and Cao, Zhipeng and Liang, Kaizhao and Wang, Ziran and Rehg, James M. and Zheng, Chao}, title = {MAPLM: A Real-World Large-Scale Vision-Language Benchmark for Map and Traffic Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21819-21830} }
EgoExoLearn: A Dataset for Bridging Asynchronous Ego- and Exo-centric View of Procedural Activities in Real World-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Yifei and Chen, Guo and Xu, Jilan and Zhang, Mingfang and Yang, Lijin and Pei, Baoqi and Zhang, Hongjie and Dong, Lu and Wang, Yali and Wang, Limin and Qiao, Yu}, title = {EgoExoLearn: A Dataset for Bridging Asynchronous Ego- and Exo-centric View of Procedural Activities in Real World}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22072-22086} }
Differentiable Micro-Mesh Construction-
[pdf]
[supp]
[bibtex]@InProceedings{Dou_2024_CVPR, author = {Dou, Yishun and Zheng, Zhong and Jin, Qiaoqiao and Shi, Rui and Li, Yuhan and Ni, Bingbing}, title = {Differentiable Micro-Mesh Construction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4294-4303} }
Improved Implicit Neural Representation with Fourier Reparameterized Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2024_CVPR, author = {Shi, Kexuan and Zhou, Xingyu and Gu, Shuhang}, title = {Improved Implicit Neural Representation with Fourier Reparameterized Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25985-25994} }
SNED: Superposition Network Architecture Search for Efficient Video Diffusion Model-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Zhengang and Kang, Yan and Liu, Yuchen and Liu, Difan and Hinz, Tobias and Liu, Feng and Wang, Yanzhi}, title = {SNED: Superposition Network Architecture Search for Efficient Video Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8661-8670} }
Groupwise Query Specialization and Quality-Aware Multi-Assignment for Transformer-based Visual Relationship Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Jongha and Park, Jihwan and Park, Jinyoung and Kim, Jinyoung and Kim, Sehyung and Kim, Hyunwoo J.}, title = {Groupwise Query Specialization and Quality-Aware Multi-Assignment for Transformer-based Visual Relationship Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28160-28169} }
LeftRefill: Filling Right Canvas based on Left Reference through Generalized Text-to-Image Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2024_CVPR, author = {Cao, Chenjie and Cai, Yunuo and Dong, Qiaole and Wang, Yikai and Fu, Yanwei}, title = {LeftRefill: Filling Right Canvas based on Left Reference through Generalized Text-to-Image Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7705-7715} }
Personalized Residuals for Concept-Driven Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ham_2024_CVPR, author = {Ham, Cusuh and Fisher, Matthew and Hays, James and Kolkin, Nicholas and Liu, Yuchen and Zhang, Richard and Hinz, Tobias}, title = {Personalized Residuals for Concept-Driven Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8186-8195} }
Condition-Aware Neural Network for Controlled Image Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Cai_2024_CVPR, author = {Cai, Han and Li, Muyang and Zhang, Qinsheng and Liu, Ming-Yu and Han, Song}, title = {Condition-Aware Neural Network for Controlled Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7194-7203} }
Versatile Navigation Under Partial Observability via Value-guided Diffusion Policy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Gengyu and Tang, Hao and Yan, Yan}, title = {Versatile Navigation Under Partial Observability via Value-guided Diffusion Policy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17943-17951} }
All in One Framework for Multimodal Re-identification in the Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, He and Ye, Mang and Zhang, Ming and Du, Bo}, title = {All in One Framework for Multimodal Re-identification in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17459-17469} }
Looking 3D: Anomaly Detection with 2D-3D Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bhunia_2024_CVPR, author = {Bhunia, Ankan and Li, Changjian and Bilen, Hakan}, title = {Looking 3D: Anomaly Detection with 2D-3D Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17263-17272} }
Purified and Unified Steganographic Network-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Guobiao and Li, Sheng and Luo, Zicong and Qian, Zhenxing and Zhang, Xinpeng}, title = {Purified and Unified Steganographic Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27569-27578} }
VS: Reconstructing Clothed 3D Human from Single Image via Vertex Shift-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Leyuan and Li, Yuhan and Gao, Yunqi and Gao, Changxin and Liu, Yuanyuan and Chen, Jingying}, title = {VS: Reconstructing Clothed 3D Human from Single Image via Vertex Shift}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10498-10507} }
PARA-Drive: Parallelized Architecture for Real-time Autonomous Driving-
[pdf]
[bibtex]@InProceedings{Weng_2024_CVPR, author = {Weng, Xinshuo and Ivanovic, Boris and Wang, Yan and Wang, Yue and Pavone, Marco}, title = {PARA-Drive: Parallelized Architecture for Real-time Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15449-15458} }
TEA: Test-time Energy Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2024_CVPR, author = {Yuan, Yige and Xu, Bingbing and Hou, Liang and Sun, Fei and Shen, Huawei and Cheng, Xueqi}, title = {TEA: Test-time Energy Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23901-23911} }
NEAT: Distilling 3D Wireframes from Neural Attraction Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xue_2024_CVPR, author = {Xue, Nan and Tan, Bin and Xiao, Yuxi and Dong, Liang and Xia, Gui-Song and Wu, Tianfu and Shen, Yujun}, title = {NEAT: Distilling 3D Wireframes from Neural Attraction Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19968-19977} }
Prompt Augmentation for Self-supervised Text-guided Image Manipulation-
[pdf]
[supp]
[bibtex]@InProceedings{Bodur_2024_CVPR, author = {Bodur, Rumeysa and Bhattarai, Binod and Kim, Tae-Kyun}, title = {Prompt Augmentation for Self-supervised Text-guided Image Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8829-8838} }
Pink: Unveiling the Power of Referential Comprehension for Multi-modal LLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xuan_2024_CVPR, author = {Xuan, Shiyu and Guo, Qingpei and Yang, Ming and Zhang, Shiliang}, title = {Pink: Unveiling the Power of Referential Comprehension for Multi-modal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13838-13848} }
LDP: Language-driven Dual-Pixel Image Defocus Deblurring Network-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Hao and Pan, Liyuan and Yang, Yan and Hartley, Richard and Liu, Miaomiao}, title = {LDP: Language-driven Dual-Pixel Image Defocus Deblurring Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24078-24087} }
MMSum: A Dataset for Multimodal Summarization and Thumbnail Generation of Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qiu_2024_CVPR, author = {Qiu, Jielin and Zhu, Jiacheng and Han, William and Kumar, Aditesh and Mittal, Karthik and Jin, Claire and Yang, Zhengyuan and Li, Linjie and Wang, Jianfeng and Zhao, Ding and Li, Bo and Wang, Lijuan}, title = {MMSum: A Dataset for Multimodal Summarization and Thumbnail Generation of Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21909-21921} }
HalluciDoctor: Mitigating Hallucinatory Toxicity in Visual Instruction Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Qifan and Li, Juncheng and Wei, Longhui and Pang, Liang and Ye, Wentao and Qin, Bosheng and Tang, Siliang and Tian, Qi and Zhuang, Yueting}, title = {HalluciDoctor: Mitigating Hallucinatory Toxicity in Visual Instruction Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12944-12953} }
Pre-trained Vision and Language Transformers Are Few-Shot Incremental Learners-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2024_CVPR, author = {Park, Keon-Hee and Song, Kyungwoo and Park, Gyeong-Moon}, title = {Pre-trained Vision and Language Transformers Are Few-Shot Incremental Learners}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23881-23890} }
Guess The Unseen: Dynamic 3D Scene Reconstruction from Partial 2D Glimpses-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2024_CVPR, author = {Lee, Inhee and Kim, Byungjun and Joo, Hanbyul}, title = {Guess The Unseen: Dynamic 3D Scene Reconstruction from Partial 2D Glimpses}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1062-1071} }
C^2RV: Cross-Regional and Cross-View Learning for Sparse-View CBCT Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2024_CVPR, author = {Lin, Yiqun and Yang, Jiewen and Wang, Hualiang and Ding, Xinpeng and Zhao, Wei and Li, Xiaomeng}, title = {C{\textasciicircum}2RV: Cross-Regional and Cross-View Learning for Sparse-View CBCT Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11205-11214} }
HyperDreamBooth: HyperNetworks for Fast Personalization of Text-to-Image Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ruiz_2024_CVPR, author = {Ruiz, Nataniel and Li, Yuanzhen and Jampani, Varun and Wei, Wei and Hou, Tingbo and Pritch, Yael and Wadhwa, Neal and Rubinstein, Michael and Aberman, Kfir}, title = {HyperDreamBooth: HyperNetworks for Fast Personalization of Text-to-Image Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6527-6536} }
Language-guided Image Reflection Separation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhong_2024_CVPR, author = {Zhong, Haofeng and Hong, Yuchen and Weng, Shuchen and Liang, Jinxiu and Shi, Boxin}, title = {Language-guided Image Reflection Separation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24913-24922} }
HardMo: A Large-Scale Hardcase Dataset for Motion Capture-
[pdf]
[supp]
[bibtex]@InProceedings{Liao_2024_CVPR, author = {Liao, Jiaqi and Luo, Chuanchen and Du, Yinuo and Wang, Yuxi and Yin, Xucheng and Zhang, Man and Zhang, Zhaoxiang and Peng, Junran}, title = {HardMo: A Large-Scale Hardcase Dataset for Motion Capture}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1629-1638} }
View-Category Interactive Sharing Transformer for Incomplete Multi-View Multi-Label Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Ou_2024_CVPR, author = {Ou, Shilong and Xue, Zhe and Li, Yawen and Liang, Meiyu and Cai, Yuanqiang and Wu, Junjiang}, title = {View-Category Interactive Sharing Transformer for Incomplete Multi-View Multi-Label Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27467-27476} }
The More You See in 2D the More You Perceive in 3D-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2024_CVPR, author = {Han, Xinyang and Gao, Zelin and Kanazawa, Angjoo and Goel, Shubham and Gandelsman, Yossi}, title = {The More You See in 2D the More You Perceive in 3D}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20912-20922} }
GLiDR: Topologically Regularized Graph Generative Network for Sparse LiDAR Point Clouds-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kumar_2024_CVPR, author = {Kumar, Prashant and Bhat, Kshitij Madhav and Nadkarni, Vedang Bhupesh Shenvi and Kalra, Prem}, title = {GLiDR: Topologically Regularized Graph Generative Network for Sparse LiDAR Point Clouds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15152-15161} }
Separate and Conquer: Decoupling Co-occurrence via Decomposition and Representation for Weakly Supervised Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Zhiwei and Fu, Kexue and Duan, Minghong and Qu, Linhao and Wang, Shuo and Song, Zhijian}, title = {Separate and Conquer: Decoupling Co-occurrence via Decomposition and Representation for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3606-3615} }
BiPer: Binary Neural Networks using a Periodic Function-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vargas_2024_CVPR, author = {Vargas, Edwin and Correa, Claudia V. and Hinojosa, Carlos and Arguello, Henry}, title = {BiPer: Binary Neural Networks using a Periodic Function}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5684-5693} }
Unifying Automatic and Interactive Matting with Pretrained ViTs-
[pdf]
[supp]
[bibtex]@InProceedings{Ye_2024_CVPR, author = {Ye, Zixuan and Liu, Wenze and Guo, He and Liang, Yujia and Hong, Chaoyi and Lu, Hao and Cao, Zhiguo}, title = {Unifying Automatic and Interactive Matting with Pretrained ViTs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25585-25594} }
Segment Any Event Streams via Weighted Adaptation of Pivotal Tokens-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Zhiwen and Zhu, Zhiyu and Zhang, Yifan and Hou, Junhui and Shi, Guangming and Wu, Jinjian}, title = {Segment Any Event Streams via Weighted Adaptation of Pivotal Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3890-3900} }
AnyDoor: Zero-shot Object-level Image Customization-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Xi and Huang, Lianghua and Liu, Yu and Shen, Yujun and Zhao, Deli and Zhao, Hengshuang}, title = {AnyDoor: Zero-shot Object-level Image Customization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6593-6602} }
Commonsense Prototype for Outdoor Unsupervised 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Hai and Zhao, Shijia and Huang, Xun and Wen, Chenglu and Li, Xin and Wang, Cheng}, title = {Commonsense Prototype for Outdoor Unsupervised 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14968-14977} }
Lookahead Exploration with Neural Radiance Representation for Continuous Vision-Language Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Zihan and Li, Xiangyang and Yang, Jiahao and Liu, Yeqi and Hu, Junjie and Jiang, Ming and Jiang, Shuqiang}, title = {Lookahead Exploration with Neural Radiance Representation for Continuous Vision-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13753-13762} }
Clustering Propagation for Universal Medical Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ding_2024_CVPR, author = {Ding, Yuhang and Li, Liulei and Wang, Wenguan and Yang, Yi}, title = {Clustering Propagation for Universal Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3357-3369} }
MoPE-CLIP: Structured Pruning for Efficient Vision-Language Models with Module-wise Pruning Error Metric-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2024_CVPR, author = {Lin, Haokun and Bai, Haoli and Liu, Zhili and Hou, Lu and Sun, Muyi and Song, Linqi and Wei, Ying and Sun, Zhenan}, title = {MoPE-CLIP: Structured Pruning for Efficient Vision-Language Models with Module-wise Pruning Error Metric}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27370-27380} }
Learning Vision from Models Rivals Learning Vision from Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tian_2024_CVPR, author = {Tian, Yonglong and Fan, Lijie and Chen, Kaifeng and Katabi, Dina and Krishnan, Dilip and Isola, Phillip}, title = {Learning Vision from Models Rivals Learning Vision from Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15887-15898} }
Leveraging Frame Affinity for sRGB-to-RAW Video De-rendering-
[pdf]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Chen and Han, Wencheng and Zhou, Yang and Shen, Jianbing and Xu, Cheng-zhong and Liu, Wentao}, title = {Leveraging Frame Affinity for sRGB-to-RAW Video De-rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25659-25668} }
Adapting Short-Term Transformers for Action Detection in Untrimmed Videos-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Min and Gao, Huan and Guo, Ping and Wang, Limin}, title = {Adapting Short-Term Transformers for Action Detection in Untrimmed Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18570-18579} }
The Mirrored Influence Hypothesis: Efficient Data Influence Estimation by Harnessing Forward Passes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ko_2024_CVPR, author = {Ko, Myeongseob and Kang, Feiyang and Shi, Weiyan and Jin, Ming and Yu, Zhou and Jia, Ruoxi}, title = {The Mirrored Influence Hypothesis: Efficient Data Influence Estimation by Harnessing Forward Passes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26286-26295} }
SOAC: Spatio-Temporal Overlap-Aware Multi-Sensor Calibration using Neural Radiance Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Herau_2024_CVPR, author = {Herau, Quentin and Piasco, Nathan and Bennehar, Moussab and Roldao, Luis and Tsishkou, Dzmitry and Migniot, Cyrille and Vasseur, Pascal and Demonceaux, C\'edric}, title = {SOAC: Spatio-Temporal Overlap-Aware Multi-Sensor Calibration using Neural Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15131-15140} }
G^3-LQ: Marrying Hyperbolic Alignment with Explicit Semantic-Geometric Modeling for 3D Visual Grounding-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Yuan and Li, Yali and Wang, Shengjin}, title = {G{\textasciicircum}3-LQ: Marrying Hyperbolic Alignment with Explicit Semantic-Geometric Modeling for 3D Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13917-13926} }
Garment Recovery with Shape and Deformation Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Ren and Dumery, Corentin and Guillard, Beno{\^\i}t and Fua, Pascal}, title = {Garment Recovery with Shape and Deformation Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1586-1595} }
Psychometry: An Omnifit Model for Image Reconstruction from Human Brain Activity-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Quan_2024_CVPR, author = {Quan, Ruijie and Wang, Wenguan and Tian, Zhibo and Ma, Fan and Yang, Yi}, title = {Psychometry: An Omnifit Model for Image Reconstruction from Human Brain Activity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {233-243} }
Exploring Regional Clues in CLIP for Zero-Shot Semantic Segmentation-
[pdf]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yi and Guo, Meng-Hao and Wang, Miao and Hu, Shi-Min}, title = {Exploring Regional Clues in CLIP for Zero-Shot Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3270-3280} }
Move as You Say Interact as You Can: Language-guided Human Motion Generation with Scene Affordance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Zan and Chen, Yixin and Jia, Baoxiong and Li, Puhao and Zhang, Jinlu and Zhang, Jingze and Liu, Tengyu and Zhu, Yixin and Liang, Wei and Huang, Siyuan}, title = {Move as You Say Interact as You Can: Language-guided Human Motion Generation with Scene Affordance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {433-444} }
Choose What You Need: Disentangled Representation Learning for Scene Text Recognition Removal and Editing-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Boqiang and Xie, Hongtao and Gao, Zuan and Wang, Yuxin}, title = {Choose What You Need: Disentangled Representation Learning for Scene Text Recognition Removal and Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28358-28368} }
Generalizable Face Landmarking Guided by Conditional Face Warping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2024_CVPR, author = {Liang, Jiayi and Liu, Haotian and Xu, Hongteng and Luo, Dixin}, title = {Generalizable Face Landmarking Guided by Conditional Face Warping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2425-2435} }
Sat2Scene: 3D Urban Scene Generation from Satellite Images with Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Zuoyue and Li, Zhenqiang and Cui, Zhaopeng and Pollefeys, Marc and Oswald, Martin R.}, title = {Sat2Scene: 3D Urban Scene Generation from Satellite Images with Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7141-7150} }
Control4D: Efficient 4D Portrait Editing with Text-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shao_2024_CVPR, author = {Shao, Ruizhi and Sun, Jingxiang and Peng, Cheng and Zheng, Zerong and Zhou, Boyao and Zhang, Hongwen and Liu, Yebin}, title = {Control4D: Efficient 4D Portrait Editing with Text}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4556-4567} }
Symphonize 3D Semantic Scene Completion with Contextual Instance Queries-
[pdf]
[arXiv]
[bibtex]@InProceedings{Jiang_2024_CVPR, author = {Jiang, Haoyi and Cheng, Tianheng and Gao, Naiyu and Zhang, Haoyang and Lin, Tianwei and Liu, Wenyu and Wang, Xinggang}, title = {Symphonize 3D Semantic Scene Completion with Contextual Instance Queries}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20258-20267} }
Loopy-SLAM: Dense Neural SLAM with Loop Closures-
[pdf]
[bibtex]@InProceedings{Liso_2024_CVPR, author = {Liso, Lorenzo and Sandstr\"om, Erik and Yugay, Vladimir and Van Gool, Luc and Oswald, Martin R.}, title = {Loopy-SLAM: Dense Neural SLAM with Loop Closures}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20363-20373} }
CLIPtone: Unsupervised Learning for Text-based Image Tone Adjustment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2024_CVPR, author = {Lee, Hyeongmin and Kang, Kyoungkook and Ok, Jungseul and Cho, Sunghyun}, title = {CLIPtone: Unsupervised Learning for Text-based Image Tone Adjustment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2942-2951} }
ToonerGAN: Reinforcing GANs for Obfuscating Automated Facial Indexing-
[pdf]
[supp]
[bibtex]@InProceedings{Thakral_2024_CVPR, author = {Thakral, Kartik and Prasad, Shashikant and Aswani, Stuti and Vatsa, Mayank and Singh, Richa}, title = {ToonerGAN: Reinforcing GANs for Obfuscating Automated Facial Indexing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10875-10884} }
Content-Adaptive Non-Local Convolution for Remote Sensing Pansharpening-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Duan_2024_CVPR, author = {Duan, Yule and Wu, Xiao and Deng, Haoyu and Deng, Liang-Jian}, title = {Content-Adaptive Non-Local Convolution for Remote Sensing Pansharpening}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27738-27747} }
Codebook Transfer with Part-of-Speech for Vector-Quantized Image Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Baoquan and Wang, Huaibin and Luo, Chuyao and Li, Xutao and Liang, Guotao and Ye, Yunming and Qi, Xiaochen and He, Yao}, title = {Codebook Transfer with Part-of-Speech for Vector-Quantized Image Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7757-7766} }
Learning Inclusion Matching for Animation Paint Bucket Colorization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dai_2024_CVPR, author = {Dai, Yuekun and Zhou, Shangchen and Li, Qinyue and Li, Chongyi and Loy, Chen Change}, title = {Learning Inclusion Matching for Animation Paint Bucket Colorization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25544-25553} }
Editable Scene Simulation for Autonomous Driving via Collaborative LLM-Agents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2024_CVPR, author = {Wei, Yuxi and Wang, Zi and Lu, Yifan and Xu, Chenxin and Liu, Changxing and Zhao, Hao and Chen, Siheng and Wang, Yanfeng}, title = {Editable Scene Simulation for Autonomous Driving via Collaborative LLM-Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15077-15087} }
SAM-6D: Segment Anything Model Meets Zero-Shot 6D Object Pose Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2024_CVPR, author = {Lin, Jiehong and Liu, Lihua and Lu, Dekun and Jia, Kui}, title = {SAM-6D: Segment Anything Model Meets Zero-Shot 6D Object Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27906-27916} }
InceptionNeXt: When Inception Meets ConvNeXt-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Weihao and Zhou, Pan and Yan, Shuicheng and Wang, Xinchao}, title = {InceptionNeXt: When Inception Meets ConvNeXt}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5672-5683} }
SnAG: Scalable and Accurate Video Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mu_2024_CVPR, author = {Mu, Fangzhou and Mo, Sicheng and Li, Yin}, title = {SnAG: Scalable and Accurate Video Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18930-18940} }
SPOT: Self-Training with Patch-Order Permutation for Object-Centric Learning with Autoregressive Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kakogeorgiou_2024_CVPR, author = {Kakogeorgiou, Ioannis and Gidaris, Spyros and Karantzalos, Konstantinos and Komodakis, Nikos}, title = {SPOT: Self-Training with Patch-Order Permutation for Object-Centric Learning with Autoregressive Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22776-22786} }
LiveHPS: LiDAR-based Scene-level Human Pose and Shape Estimation in Free Environment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2024_CVPR, author = {Ren, Yiming and Han, Xiao and Zhao, Chengfeng and Wang, Jingya and Xu, Lan and Yu, Jingyi and Ma, Yuexin}, title = {LiveHPS: LiDAR-based Scene-level Human Pose and Shape Estimation in Free Environment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1281-1291} }
Segment Every Out-of-Distribution Object-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Wenjie and Li, Jia and Dong, Xin and Xiang, Yu and Guo, Yunhui}, title = {Segment Every Out-of-Distribution Object}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3910-3920} }
Building Vision-Language Models on Solid Foundations with Masked Distillation-
[pdf]
[bibtex]@InProceedings{Sameni_2024_CVPR, author = {Sameni, Sepehr and Kafle, Kushal and Tan, Hao and Jenni, Simon}, title = {Building Vision-Language Models on Solid Foundations with Masked Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14216-14226} }
Wavelet-based Fourier Information Interaction with Frequency Diffusion Adjustment for Underwater Image Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Chen and Cai, Weiling and Dong, Chenyu and Hu, Chengwei}, title = {Wavelet-based Fourier Information Interaction with Frequency Diffusion Adjustment for Underwater Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8281-8291} }
CroSel: Cross Selection of Confident Pseudo Labels for Partial-Label Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tian_2024_CVPR, author = {Tian, Shiyu and Wei, Hongxin and Wang, Yiqun and Feng, Lei}, title = {CroSel: Cross Selection of Confident Pseudo Labels for Partial-Label Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19479-19488} }
PoNQ: a Neural QEM-based Mesh Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Maruani_2024_CVPR, author = {Maruani, Nissim and Ovsjanikov, Maks and Alliez, Pierre and Desbrun, Mathieu}, title = {PoNQ: a Neural QEM-based Mesh Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3647-3657} }
ModaVerse: Efficiently Transforming Modalities with LLMs-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Xinyu and Zhuang, Bohan and Wu, Qi}, title = {ModaVerse: Efficiently Transforming Modalities with LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26606-26616} }
TransLoc4D: Transformer-based 4D Radar Place Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Peng_2024_CVPR, author = {Peng, Guohao and Li, Heshan and Zhao, Yangyang and Zhang, Jun and Wu, Zhenyu and Zheng, Pengyu and Wang, Danwei}, title = {TransLoc4D: Transformer-based 4D Radar Place Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17595-17605} }
Frequency-aware Event-based Video Deblurring for Real-World Motion Blur-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Taewoo and Cho, Hoonhee and Yoon, Kuk-Jin}, title = {Frequency-aware Event-based Video Deblurring for Real-World Motion Blur}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24966-24976} }
Multiscale Vision Transformers Meet Bipartite Matching for Efficient Single-stage Action Localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ntinou_2024_CVPR, author = {Ntinou, Ioanna and Sanchez, Enrique and Tzimiropoulos, Georgios}, title = {Multiscale Vision Transformers Meet Bipartite Matching for Efficient Single-stage Action Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18827-18836} }
Boosting Order-Preserving and Transferability for Neural Architecture Search: a Joint Architecture Refined Search and Fine-tuning Approach-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Beichen and Wang, Xiaoxing and Qin, Xiaohan and Yan, Junchi}, title = {Boosting Order-Preserving and Transferability for Neural Architecture Search: a Joint Architecture Refined Search and Fine-tuning Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5662-5671} }
Dr. Bokeh: DiffeRentiable Occlusion-aware Bokeh Rendering-
[pdf]
[supp]
[bibtex]@InProceedings{Sheng_2024_CVPR, author = {Sheng, Yichen and Yu, Zixun and Ling, Lu and Cao, Zhiwen and Zhang, Xuaner and Lu, Xin and Xian, Ke and Lin, Haiting and Benes, Bedrich}, title = {Dr. Bokeh: DiffeRentiable Occlusion-aware Bokeh Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4515-4525} }
Unsegment Anything by Simulating Deformation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2024_CVPR, author = {Lu, Jiahao and Yang, Xingyi and Wang, Xinchao}, title = {Unsegment Anything by Simulating Deformation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24294-24304} }
Transductive Zero-Shot and Few-Shot CLIP-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Martin_2024_CVPR, author = {Martin, S\'egol\`ene and Huang, Yunshi and Shakeri, Fereshteh and Pesquet, Jean-Christophe and Ben Ayed, Ismail}, title = {Transductive Zero-Shot and Few-Shot CLIP}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28816-28826} }
Deep Single Image Camera Calibration by Heatmap Regression to Recover Fisheye Images Under Manhattan World Assumption-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wakai_2024_CVPR, author = {Wakai, Nobuhiko and Sato, Satoshi and Ishii, Yasunori and Yamashita, Takayoshi}, title = {Deep Single Image Camera Calibration by Heatmap Regression to Recover Fisheye Images Under Manhattan World Assumption}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11884-11894} }
ID-Blau: Image Deblurring by Implicit Diffusion-based reBLurring AUgmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Jia-Hao and Tsai, Fu-Jen and Peng, Yan-Tsung and Tsai, Chung-Chi and Lin, Chia-Wen and Lin, Yen-Yu}, title = {ID-Blau: Image Deblurring by Implicit Diffusion-based reBLurring AUgmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25847-25856} }
LAENeRF: Local Appearance Editing for Neural Radiance Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Radl_2024_CVPR, author = {Radl, Lukas and Steiner, Michael and Kurz, Andreas and Steinberger, Markus}, title = {LAENeRF: Local Appearance Editing for Neural Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4969-4978} }
CSTA: CNN-based Spatiotemporal Attention for Video Summarization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Son_2024_CVPR, author = {Son, Jaewon and Park, Jaehun and Kim, Kwangsu}, title = {CSTA: CNN-based Spatiotemporal Attention for Video Summarization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18847-18856} }
Adversarial Score Distillation: When score distillation meets GAN-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2024_CVPR, author = {Wei, Min and Zhou, Jingkai and Sun, Junyao and Zhang, Xuesong}, title = {Adversarial Score Distillation: When score distillation meets GAN}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8131-8141} }
Decentralized Directed Collaboration for Personalized Federated Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Yingqi and Shi, Yifan and Li, Qinglun and Wu, Baoyuan and Wang, Xueqian and Shen, Li}, title = {Decentralized Directed Collaboration for Personalized Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23168-23178} }
Vector Graphics Generation via Mutually Impulsed Dual-domain Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Zhongyin and Chen, Ye and Hu, Zhangli and Chen, Xuanhong and Ni, Bingbing}, title = {Vector Graphics Generation via Mutually Impulsed Dual-domain Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4420-4428} }
PEM: Prototype-based Efficient MaskFormer for Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cavagnero_2024_CVPR, author = {Cavagnero, Niccol\`o and Rosi, Gabriele and Cuttano, Claudia and Pistilli, Francesca and Ciccone, Marco and Averta, Giuseppe and Cermelli, Fabio}, title = {PEM: Prototype-based Efficient MaskFormer for Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15804-15813} }
Referring Expression Counting-
[pdf]
[supp]
[bibtex]@InProceedings{Dai_2024_CVPR, author = {Dai, Siyang and Liu, Jun and Cheung, Ngai-Man}, title = {Referring Expression Counting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16985-16995} }
ScoreHypo: Probabilistic Human Mesh Estimation with Hypothesis Scoring-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Yuan and Ma, Xiaoxuan and Su, Jiajun and Zhu, Wentao and Qiao, Yu and Wang, Yizhou}, title = {ScoreHypo: Probabilistic Human Mesh Estimation with Hypothesis Scoring}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {979-989} }
GES : Generalized Exponential Splatting for Efficient Radiance Field Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hamdi_2024_CVPR, author = {Hamdi, Abdullah and Melas-Kyriazi, Luke and Mai, Jinjie and Qian, Guocheng and Liu, Ruoshi and Vondrick, Carl and Ghanem, Bernard and Vedaldi, Andrea}, title = {GES : Generalized Exponential Splatting for Efficient Radiance Field Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19812-19822} }
Learning to Predict Activity Progress by Self-Supervised Video Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Donahue_2024_CVPR, author = {Donahue, Gerard and Elhamifar, Ehsan}, title = {Learning to Predict Activity Progress by Self-Supervised Video Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18667-18677} }
VicTR: Video-conditioned Text Representations for Activity Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kahatapitiya_2024_CVPR, author = {Kahatapitiya, Kumara and Arnab, Anurag and Nagrani, Arsha and Ryoo, Michael S.}, title = {VicTR: Video-conditioned Text Representations for Activity Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18547-18558} }
Label-Efficient Group Robustness via Out-of-Distribution Concept Curation-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Yiwei and Liu, Anthony Z. and Wolfe, Robert and Caliskan, Aylin and Howe, Bill}, title = {Label-Efficient Group Robustness via Out-of-Distribution Concept Curation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12426-12434} }
MMCert: Provable Defense against Adversarial Attacks to Multi-modal Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Yanting and Fu, Hongye and Zou, Wei and Jia, Jinyuan}, title = {MMCert: Provable Defense against Adversarial Attacks to Multi-modal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24655-24664} }
3DToonify: Creating Your High-Fidelity 3D Stylized Avatar Easily from 2D Portrait Images-
[pdf]
[supp]
[bibtex]@InProceedings{Men_2024_CVPR, author = {Men, Yifang and Liu, Hanxi and Yao, Yuan and Cui, Miaomiao and Xie, Xuansong and Lian, Zhouhui}, title = {3DToonify: Creating Your High-Fidelity 3D Stylized Avatar Easily from 2D Portrait Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10127-10137} }
NAYER: Noisy Layer Data Generation for Efficient and Effective Data-free Knowledge Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tran_2024_CVPR, author = {Tran, Minh-Tuan and Le, Trung and Le, Xuan-May and Harandi, Mehrtash and Tran, Quan Hung and Phung, Dinh}, title = {NAYER: Noisy Layer Data Generation for Efficient and Effective Data-free Knowledge Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23860-23869} }
OmniVec2 - A Novel Transformer based Network for Large Scale Multimodal and Multitask Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Srivastava_2024_CVPR, author = {Srivastava, Siddharth and Sharma, Gaurav}, title = {OmniVec2 - A Novel Transformer based Network for Large Scale Multimodal and Multitask Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27412-27424} }
Investigating Compositional Challenges in Vision-Language Models for Visual Grounding-
[pdf]
[supp]
[bibtex]@InProceedings{Zeng_2024_CVPR, author = {Zeng, Yunan and Huang, Yan and Zhang, Jinjin and Jie, Zequn and Chai, Zhenhua and Wang, Liang}, title = {Investigating Compositional Challenges in Vision-Language Models for Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14141-14151} }
6D-Diff: A Keypoint Diffusion Framework for 6D Object Pose Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Li and Qu, Haoxuan and Cai, Yujun and Liu, Jun}, title = {6D-Diff: A Keypoint Diffusion Framework for 6D Object Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9676-9686} }
Generative Region-Language Pretraining for Open-Ended Object Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lin_2024_CVPR, author = {Lin, Chuang and Jiang, Yi and Qu, Lizhen and Yuan, Zehuan and Cai, Jianfei}, title = {Generative Region-Language Pretraining for Open-Ended Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13958-13968} }
Enhancing Post-training Quantization Calibration through Contrastive Learning-
[pdf]
[bibtex]@InProceedings{Shang_2024_CVPR, author = {Shang, Yuzhang and Liu, Gaowen and Kompella, Ramana Rao and Yan, Yan}, title = {Enhancing Post-training Quantization Calibration through Contrastive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15921-15930} }
Efficient Model Stealing Defense with Noise Transition Matrix-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Dong-Dong and Fu, Chilin and Wu, Weichang and Xia, Wenwen and Zhang, Xiaolu and Zhou, Jun and Zhang, Min-Ling}, title = {Efficient Model Stealing Defense with Noise Transition Matrix}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24305-24315} }
MeshPose: Unifying DensePose and 3D Body Mesh Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Le_2024_CVPR, author = {Le, Eric-Tuan and Kakolyris, Antonis and Koutras, Petros and Tam, Himmy and Skordos, Efstratios and Papandreou, George and G\"uler, Riza Alp and Kokkinos, Iasonas}, title = {MeshPose: Unifying DensePose and 3D Body Mesh Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2405-2414} }
Unsupervised Salient Instance Detection-
[pdf]
[bibtex]@InProceedings{Tian_2024_CVPR, author = {Tian, Xin and Xu, Ke and Lau, Rynson}, title = {Unsupervised Salient Instance Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2702-2712} }
Enhancing Visual Document Understanding with Contrastive Learning in Large Visual-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Xin and Wu, Yunfei and Jiang, Xinghua and Guo, Zhihao and Gong, Mingming and Cao, Haoyu and Liu, Yinsong and Jiang, Deqiang and Sun, Xing}, title = {Enhancing Visual Document Understanding with Contrastive Learning in Large Visual-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15546-15555} }
Move Anything with Layered Scene Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2024_CVPR, author = {Ren, Jiawei and Xu, Mengmeng and Wu, Jui-Chieh and Liu, Ziwei and Xiang, Tao and Toisoul, Antoine}, title = {Move Anything with Layered Scene Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6380-6389} }
GS-SLAM: Dense Visual SLAM with 3D Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Yan_2024_CVPR, author = {Yan, Chi and Qu, Delin and Xu, Dan and Zhao, Bin and Wang, Zhigang and Wang, Dong and Li, Xuelong}, title = {GS-SLAM: Dense Visual SLAM with 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19595-19604} }
Scaffold-GS: Structured 3D Gaussians for View-Adaptive Rendering-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2024_CVPR, author = {Lu, Tao and Yu, Mulin and Xu, Linning and Xiangli, Yuanbo and Wang, Limin and Lin, Dahua and Dai, Bo}, title = {Scaffold-GS: Structured 3D Gaussians for View-Adaptive Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20654-20664} }
Data Valuation and Detections in Federated Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Wenqian and Fu, Shuran and Zhang, Fengrui and Pang, Yan}, title = {Data Valuation and Detections in Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12027-12036} }
Classes Are Not Equal: An Empirical Study on Image Recognition Fairness-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cui_2024_CVPR, author = {Cui, Jiequan and Zhu, Beier and Wen, Xin and Qi, Xiaojuan and Yu, Bei and Zhang, Hanwang}, title = {Classes Are Not Equal: An Empirical Study on Image Recognition Fairness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23283-23292} }
Human Gaussian Splatting: Real-time Rendering of Animatable Avatars-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Moreau_2024_CVPR, author = {Moreau, Arthur and Song, Jifei and Dhamo, Helisa and Shaw, Richard and Zhou, Yiren and P\'erez-Pellitero, Eduardo}, title = {Human Gaussian Splatting: Real-time Rendering of Animatable Avatars}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {788-798} }
Multi-Scale 3D Gaussian Splatting for Anti-Aliased Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2024_CVPR, author = {Yan, Zhiwen and Low, Weng Fei and Chen, Yu and Lee, Gim Hee}, title = {Multi-Scale 3D Gaussian Splatting for Anti-Aliased Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20923-20931} }
A Bayesian Approach to OOD Robustness in Image Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kaushik_2024_CVPR, author = {Kaushik, Prakhar and Kortylewski, Adam and Yuille, Alan}, title = {A Bayesian Approach to OOD Robustness in Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22988-22997} }
Unified-IO 2: Scaling Autoregressive Multimodal Models with Vision Language Audio and Action-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2024_CVPR, author = {Lu, Jiasen and Clark, Christopher and Lee, Sangho and Zhang, Zichen and Khosla, Savya and Marten, Ryan and Hoiem, Derek and Kembhavi, Aniruddha}, title = {Unified-IO 2: Scaling Autoregressive Multimodal Models with Vision Language Audio and Action}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26439-26455} }
Joint Reconstruction of 3D Human and Object via Contact-Based Refinement Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nam_2024_CVPR, author = {Nam, Hyeongjin and Jung, Daniel Sungho and Moon, Gyeongsik and Lee, Kyoung Mu}, title = {Joint Reconstruction of 3D Human and Object via Contact-Based Refinement Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10218-10227} }
TIM: A Time Interval Machine for Audio-Visual Action Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chalk_2024_CVPR, author = {Chalk, Jacob and Huh, Jaesung and Kazakos, Evangelos and Zisserman, Andrew and Damen, Dima}, title = {TIM: A Time Interval Machine for Audio-Visual Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18153-18163} }
The Devil is in the Details: StyleFeatureEditor for Detail-Rich StyleGAN Inversion and High Quality Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bobkov_2024_CVPR, author = {Bobkov, Denis and Titov, Vadim and Alanov, Aibek and Vetrov, Dmitry}, title = {The Devil is in the Details: StyleFeatureEditor for Detail-Rich StyleGAN Inversion and High Quality Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9337-9346} }
Unbiased Estimator for Distorted Conics in Camera Calibration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2024_CVPR, author = {Song, Chaehyeon and Shin, Jaeho and Jeon, Myung-Hwan and Lim, Jongwoo and Kim, Ayoung}, title = {Unbiased Estimator for Distorted Conics in Camera Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {373-381} }
MultiPhys: Multi-Person Physics-aware 3D Motion Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ugrinovic_2024_CVPR, author = {Ugrinovic, Nicolas and Pan, Boxiao and Pavlakos, Georgios and Paschalidou, Despoina and Shen, Bokui and Sanchez-Riera, Jordi and Moreno-Noguer, Francesc and Guibas, Leonidas}, title = {MultiPhys: Multi-Person Physics-aware 3D Motion Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2331-2340} }
Multi-Level Neural Scene Graphs for Dynamic Urban Environments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fischer_2024_CVPR, author = {Fischer, Tobias and Porzi, Lorenzo and Bulo, Samuel Rota and Pollefeys, Marc and Kontschieder, Peter}, title = {Multi-Level Neural Scene Graphs for Dynamic Urban Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21125-21135} }
Would Deep Generative Models Amplify Bias in Future Models?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Tianwei and Hirota, Yusuke and Otani, Mayu and Garcia, Noa and Nakashima, Yuta}, title = {Would Deep Generative Models Amplify Bias in Future Models?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10833-10843} }
Bayes' Rays: Uncertainty Quantification for Neural Radiance Fields-
[pdf]
[supp]
[bibtex]@InProceedings{Goli_2024_CVPR, author = {Goli, Lily and Reading, Cody and Sell\'an, Silvia and Jacobson, Alec and Tagliasacchi, Andrea}, title = {Bayes' Rays: Uncertainty Quantification for Neural Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20061-20070} }
NIVeL: Neural Implicit Vector Layers for Text-to-Vector Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Thamizharasan_2024_CVPR, author = {Thamizharasan, Vikas and Liu, Difan and Fisher, Matthew and Zhao, Nanxuan and Kalogerakis, Evangelos and Lukac, Michal}, title = {NIVeL: Neural Implicit Vector Layers for Text-to-Vector Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4589-4597} }
Driving-Video Dehazing with Non-Aligned Regularization for Safety Assistance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2024_CVPR, author = {Fan, Junkai and Weng, Jiangwei and Wang, Kun and Yang, Yijun and Qian, Jianjun and Li, Jun and Yang, Jian}, title = {Driving-Video Dehazing with Non-Aligned Regularization for Safety Assistance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26109-26119} }
Is Vanilla MLP in Neural Radiance Field Enough for Few-shot View Synthesis?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Hanxin and He, Tianyu and Li, Xin and Li, Bingchen and Chen, Zhibo}, title = {Is Vanilla MLP in Neural Radiance Field Enough for Few-shot View Synthesis?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20288-20298} }
CVT-xRF: Contrastive In-Voxel Transformer for 3D Consistent Radiance Fields from Sparse Inputs-
[pdf]
[bibtex]@InProceedings{Zhong_2024_CVPR, author = {Zhong, Yingji and Hong, Lanqing and Li, Zhenguo and Xu, Dan}, title = {CVT-xRF: Contrastive In-Voxel Transformer for 3D Consistent Radiance Fields from Sparse Inputs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21466-21475} }
OAKINK2: A Dataset of Bimanual Hands-Object Manipulation in Complex Task Completion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhan_2024_CVPR, author = {Zhan, Xinyu and Yang, Lixin and Zhao, Yifei and Mao, Kangrui and Xu, Hanlin and Lin, Zenan and Li, Kailin and Lu, Cewu}, title = {OAKINK2: A Dataset of Bimanual Hands-Object Manipulation in Complex Task Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {445-456} }
CogAgent: A Visual Language Model for GUI Agents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hong_2024_CVPR, author = {Hong, Wenyi and Wang, Weihan and Lv, Qingsong and Xu, Jiazheng and Yu, Wenmeng and Ji, Junhui and Wang, Yan and Wang, Zihan and Dong, Yuxiao and Ding, Ming and Tang, Jie}, title = {CogAgent: A Visual Language Model for GUI Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14281-14290} }
Text-Guided 3D Face Synthesis - From Generation to Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Yunjie and Meng, Yapeng and Hu, Zhipeng and Li, Lincheng and Wu, Haoqian and Zhou, Kun and Xu, Weiwei and Yu, Xin}, title = {Text-Guided 3D Face Synthesis - From Generation to Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1260-1269} }
AIDE: An Automatic Data Engine for Object Detection in Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2024_CVPR, author = {Liang, Mingfu and Su, Jong-Chyi and Schulter, Samuel and Garg, Sparsh and Zhao, Shiyu and Wu, Ying and Chandraker, Manmohan}, title = {AIDE: An Automatic Data Engine for Object Detection in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14695-14706} }
Multiplane Prior Guided Few-Shot Aerial Scene Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2024_CVPR, author = {Gao, Zihan and Jiao, Licheng and Li, Lingling and Liu, Xu and Liu, Fang and Chen, Puhua and Guo, Yuwei}, title = {Multiplane Prior Guided Few-Shot Aerial Scene Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5009-5019} }
MAS: Multi-view Ancestral Sampling for 3D Motion Generation Using 2D Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kapon_2024_CVPR, author = {Kapon, Roy and Tevet, Guy and Cohen-Or, Daniel and Bermano, Amit H.}, title = {MAS: Multi-view Ancestral Sampling for 3D Motion Generation Using 2D Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1965-1974} }
Smart Help: Strategic Opponent Modeling for Proactive and Adaptive Robot Assistance in Households-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2024_CVPR, author = {Cao, Zhihao and Wang, Zidong and Xie, Siwen and Liu, Anji and Fan, Lifeng}, title = {Smart Help: Strategic Opponent Modeling for Proactive and Adaptive Robot Assistance in Households}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18091-18101} }
Bilateral Event Mining and Complementary for Event Stream Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Zhilin and Liang, Quanmin and Yu, Yijie and Qin, Chujun and Zheng, Xiawu and Huang, Kai and Zhou, Zikun and Yang, Wenming}, title = {Bilateral Event Mining and Complementary for Event Stream Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {34-43} }
Online Task-Free Continual Generative and Discriminative Learning via Dynamic Cluster Memory-
[pdf]
[supp]
[bibtex]@InProceedings{Ye_2024_CVPR, author = {Ye, Fei and Bors, Adrian G.}, title = {Online Task-Free Continual Generative and Discriminative Learning via Dynamic Cluster Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26202-26212} }
Rapid Motor Adaptation for Robotic Manipulator Arms-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2024_CVPR, author = {Liang, Yichao and Ellis, Kevin and Henriques, Jo\~ao}, title = {Rapid Motor Adaptation for Robotic Manipulator Arms}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16404-16413} }
SANeRF-HQ: Segment Anything for NeRF in High Quality-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Yichen and Hu, Benran and Tang, Chi-Keung and Tai, Yu-Wing}, title = {SANeRF-HQ: Segment Anything for NeRF in High Quality}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3216-3226} }
DSGG: Dense Relation Transformer for an End-to-end Scene Graph Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hayder_2024_CVPR, author = {Hayder, Zeeshan and He, Xuming}, title = {DSGG: Dense Relation Transformer for an End-to-end Scene Graph Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28317-28326} }
Transcending the Limit of Local Window: Advanced Super-Resolution Transformer with Adaptive Token Dictionary-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Leheng and Li, Yawei and Zhou, Xingyu and Zhao, Xiaorui and Gu, Shuhang}, title = {Transcending the Limit of Local Window: Advanced Super-Resolution Transformer with Adaptive Token Dictionary}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2856-2865} }
Object Dynamics Modeling with Hierarchical Point Cloud-based Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Chanho and Fuxin, Li}, title = {Object Dynamics Modeling with Hierarchical Point Cloud-based Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20977-20986} }
WWW: A Unified Framework for Explaining What Where and Why of Neural Networks by Interpretation of Neuron Concepts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ahn_2024_CVPR, author = {Ahn, Yong Hyun and Kim, Hyeon Bae and Kim, Seong Tae}, title = {WWW: A Unified Framework for Explaining What Where and Why of Neural Networks by Interpretation of Neuron Concepts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10968-10977} }
SkySense: A Multi-Modal Remote Sensing Foundation Model Towards Universal Interpretation for Earth Observation Imagery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2024_CVPR, author = {Guo, Xin and Lao, Jiangwei and Dang, Bo and Zhang, Yingying and Yu, Lei and Ru, Lixiang and Zhong, Liheng and Huang, Ziyuan and Wu, Kang and Hu, Dingxiang and He, Huimei and Wang, Jian and Chen, Jingdong and Yang, Ming and Zhang, Yongjun and Li, Yansheng}, title = {SkySense: A Multi-Modal Remote Sensing Foundation Model Towards Universal Interpretation for Earth Observation Imagery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27672-27683} }
CaKDP: Category-aware Knowledge Distillation and Pruning Framework for Lightweight 3D Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Haonan and Liu, Longjun and Huang, Yuqi and Yang, Zhao and Lei, Xinyu and Wen, Bihan}, title = {CaKDP: Category-aware Knowledge Distillation and Pruning Framework for Lightweight 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15331-15341} }
Mixed-Precision Quantization for Federated Learning on Resource-Constrained Heterogeneous Devices-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Huancheng and Vikalo, Haris}, title = {Mixed-Precision Quantization for Federated Learning on Resource-Constrained Heterogeneous Devices}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6138-6148} }
CFAT: Unleashing Triangular Windows for Image Super-resolution-
[pdf]
[supp]
[bibtex]@InProceedings{Ray_2024_CVPR, author = {Ray, Abhisek and Kumar, Gaurav and Kolekar, Maheshkumar H.}, title = {CFAT: Unleashing Triangular Windows for Image Super-resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26120-26129} }
ICP-Flow: LiDAR Scene Flow Estimation with ICP-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2024_CVPR, author = {Lin, Yancong and Caesar, Holger}, title = {ICP-Flow: LiDAR Scene Flow Estimation with ICP}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15501-15511} }
MADTP: Multimodal Alignment-Guided Dynamic Token Pruning for Accelerating Vision-Language Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2024_CVPR, author = {Cao, Jianjian and Ye, Peng and Li, Shengze and Yu, Chong and Tang, Yansong and Lu, Jiwen and Chen, Tao}, title = {MADTP: Multimodal Alignment-Guided Dynamic Token Pruning for Accelerating Vision-Language Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15710-15719} }
G-NeRF: Geometry-enhanced Novel View Synthesis from Single-View Images-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Zixiong and Chen, Qi and Sun, Libo and Yang, Yifan and Wang, Naizhou and Wu, Qi and Tan, Mingkui}, title = {G-NeRF: Geometry-enhanced Novel View Synthesis from Single-View Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10117-10126} }
Neural Fields as Distributions: Signal Processing Beyond Euclidean Space-
[pdf]
[supp]
[bibtex]@InProceedings{Rebain_2024_CVPR, author = {Rebain, Daniel and Yazdani, Soroosh and Yi, Kwang Moo and Tagliasacchi, Andrea}, title = {Neural Fields as Distributions: Signal Processing Beyond Euclidean Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4274-4283} }
Rolling Shutter Correction with Intermediate Distortion Flow Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2024_CVPR, author = {Cao, Mingdeng and Yang, Sidi and Yang, Yujiu and Zheng, Yinqiang}, title = {Rolling Shutter Correction with Intermediate Distortion Flow Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25338-25347} }
Style Blind Domain Generalized Semantic Segmentation via Covariance Alignment and Semantic Consistence Contrastive Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ahn_2024_CVPR, author = {Ahn, Woo-Jin and Yang, Geun-Yeong and Choi, Hyun-Duck and Lim, Myo-Taeg}, title = {Style Blind Domain Generalized Semantic Segmentation via Covariance Alignment and Semantic Consistence Contrastive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3616-3626} }
Attack To Defend: Exploiting Adversarial Attacks for Detecting Poisoned Models-
[pdf]
[supp]
[bibtex]@InProceedings{Fares_2024_CVPR, author = {Fares, Samar and Nandakumar, Karthik}, title = {Attack To Defend: Exploiting Adversarial Attacks for Detecting Poisoned Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24726-24735} }
X-3D: Explicit 3D Structure Modeling for Point Cloud Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2024_CVPR, author = {Sun, Shuofeng and Rao, Yongming and Lu, Jiwen and Yan, Haibin}, title = {X-3D: Explicit 3D Structure Modeling for Point Cloud Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5074-5083} }
SpiderMatch: 3D Shape Matching with Global Optimality and Geometric Consistency-
[pdf]
[supp]
[bibtex]@InProceedings{Roetzer_2024_CVPR, author = {Roetzer, Paul and Bernard, Florian}, title = {SpiderMatch: 3D Shape Matching with Global Optimality and Geometric Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14543-14553} }
Troika: Multi-Path Cross-Modal Traction for Compositional Zero-Shot Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Siteng and Gong, Biao and Feng, Yutong and Zhang, Min and Lv, Yiliang and Wang, Donglin}, title = {Troika: Multi-Path Cross-Modal Traction for Compositional Zero-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24005-24014} }
One More Step: A Versatile Plug-and-Play Module for Rectifying Diffusion Schedule Flaws and Enhancing Low-Frequency Controls-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2024_CVPR, author = {Hu, Minghui and Zheng, Jianbin and Zheng, Chuanxia and Wang, Chaoyue and Tao, Dacheng and Cham, Tat-Jen}, title = {One More Step: A Versatile Plug-and-Play Module for Rectifying Diffusion Schedule Flaws and Enhancing Low-Frequency Controls}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7331-7340} }
Enhancing Multimodal Cooperation via Sample-level Modality Valuation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2024_CVPR, author = {Wei, Yake and Feng, Ruoxuan and Wang, Zihe and Hu, Di}, title = {Enhancing Multimodal Cooperation via Sample-level Modality Valuation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27338-27347} }
Evidential Active Recognition: Intelligent and Prudent Open-World Embodied Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2024_CVPR, author = {Fan, Lei and Liang, Mingfu and Li, Yunxuan and Hua, Gang and Wu, Ying}, title = {Evidential Active Recognition: Intelligent and Prudent Open-World Embodied Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16351-16361} }
SatSynth: Augmenting Image-Mask Pairs through Diffusion Models for Aerial Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Toker_2024_CVPR, author = {Toker, Aysim and Eisenberger, Marvin and Cremers, Daniel and Leal-Taix\'e, Laura}, title = {SatSynth: Augmenting Image-Mask Pairs through Diffusion Models for Aerial Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27695-27705} }
XScale-NVS: Cross-Scale Novel View Synthesis with Hash Featurized Manifold-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Guangyu and Zhang, Jinzhi and Wang, Fan and Huang, Ruqi and Fang, Lu}, title = {XScale-NVS: Cross-Scale Novel View Synthesis with Hash Featurized Manifold}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21029-21039} }
Ink Dot-Oriented Differentiable Optimization for Neural Image Halftoning-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2024_CVPR, author = {Jiang, Hao and Zhou, Bingfeng and Mu, Yadong}, title = {Ink Dot-Oriented Differentiable Optimization for Neural Image Halftoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27528-27537} }
The Unreasonable Effectiveness of Pre-Trained Features for Camera Pose Refinement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Trivigno_2024_CVPR, author = {Trivigno, Gabriele and Masone, Carlo and Caputo, Barbara and Sattler, Torsten}, title = {The Unreasonable Effectiveness of Pre-Trained Features for Camera Pose Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12786-12798} }
Scalable 3D Registration via Truncated Entry-wise Absolute Residuals-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Tianyu and Peng, Liangzu and Vidal, Rene and Liu, Yun-Hui}, title = {Scalable 3D Registration via Truncated Entry-wise Absolute Residuals}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27477-27487} }
ExtraNeRF: Visibility-Aware View Extrapolation of Neural Radiance Fields with Diffusion Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Shih_2024_CVPR, author = {Shih, Meng-Li and Ma, Wei-Chiu and Boyice, Lorenzo and Holynski, Aleksander and Cole, Forrester and Curless, Brian and Kontkanen, Janne}, title = {ExtraNeRF: Visibility-Aware View Extrapolation of Neural Radiance Fields with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20385-20395} }
Equivariant Plug-and-Play Image Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Terris_2024_CVPR, author = {Terris, Matthieu and Moreau, Thomas and Pustelnik, Nelly and Tachella, Julian}, title = {Equivariant Plug-and-Play Image Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25255-25264} }
CLIP as RNN: Segment Countless Visual Concepts without Training Endeavor-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2024_CVPR, author = {Sun, Shuyang and Li, Runjia and Torr, Philip and Gu, Xiuye and Li, Siyang}, title = {CLIP as RNN: Segment Countless Visual Concepts without Training Endeavor}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13171-13182} }
LP++: A Surprisingly Strong Linear Probe for Few-Shot CLIP-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Yunshi and Shakeri, Fereshteh and Dolz, Jose and Boudiaf, Malik and Bahig, Houda and Ben Ayed, Ismail}, title = {LP++: A Surprisingly Strong Linear Probe for Few-Shot CLIP}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23773-23782} }
Active Generalized Category Discovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2024_CVPR, author = {Ma, Shijie and Zhu, Fei and Zhong, Zhun and Zhang, Xu-Yao and Liu, Cheng-Lin}, title = {Active Generalized Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16890-16900} }
HIVE: Harnessing Human Feedback for Instructional Visual Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Shu and Yang, Xinyi and Feng, Yihao and Qin, Can and Chen, Chia-Chih and Yu, Ning and Chen, Zeyuan and Wang, Huan and Savarese, Silvio and Ermon, Stefano and Xiong, Caiming and Xu, Ran}, title = {HIVE: Harnessing Human Feedback for Instructional Visual Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9026-9036} }
StrokeFaceNeRF: Stroke-based Facial Appearance Editing in Neural Radiance Field-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Xiao-Juan and Zhang, Dingxi and Chen, Shu-Yu and Liu, Feng-Lin}, title = {StrokeFaceNeRF: Stroke-based Facial Appearance Editing in Neural Radiance Field}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7538-7547} }
FlowVQTalker: High-Quality Emotional Talking Face Generation through Normalizing Flow and Quantization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tan_2024_CVPR, author = {Tan, Shuai and Ji, Bin and Pan, Ye}, title = {FlowVQTalker: High-Quality Emotional Talking Face Generation through Normalizing Flow and Quantization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26317-26327} }
Learning from Observer Gaze: Zero-Shot Attention Prediction Oriented by Human-Object Interaction Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Yuchen and Liu, Linkai and Gou, Chao}, title = {Learning from Observer Gaze: Zero-Shot Attention Prediction Oriented by Human-Object Interaction Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28390-28400} }
ProxyCap: Real-time Monocular Full-body Capture in World Space via Human-Centric Proxy-to-Motion Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yuxiang and Zhang, Hongwen and Hu, Liangxiao and Zhang, Jiajun and Yi, Hongwei and Zhang, Shengping and Liu, Yebin}, title = {ProxyCap: Real-time Monocular Full-body Capture in World Space via Human-Centric Proxy-to-Motion Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1954-1964} }
OpenBias: Open-set Bias Detection in Text-to-Image Generative Models-
[pdf]
[supp]
[bibtex]@InProceedings{D'Inca_2024_CVPR, author = {D'Inc\`a, Moreno and Peruzzo, Elia and Mancini, Massimiliano and Xu, Dejia and Goel, Vidit and Xu, Xingqian and Wang, Zhangyang and Shi, Humphrey and Sebe, Nicu}, title = {OpenBias: Open-set Bias Detection in Text-to-Image Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12225-12235} }
On the Robustness of Language Guidance for Low-Level Vision Tasks: Findings from Depth Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chatterjee_2024_CVPR, author = {Chatterjee, Agneet and Gokhale, Tejas and Baral, Chitta and Yang, Yezhou}, title = {On the Robustness of Language Guidance for Low-Level Vision Tasks: Findings from Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2794-2803} }
UFOGen: You Forward Once Large Scale Text-to-Image Generation via Diffusion GANs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Yanwu and Zhao, Yang and Xiao, Zhisheng and Hou, Tingbo}, title = {UFOGen: You Forward Once Large Scale Text-to-Image Generation via Diffusion GANs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8196-8206} }
3DiffTection: 3D Object Detection with Geometry-Aware Diffusion Features-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Chenfeng and Ling, Huan and Fidler, Sanja and Litany, Or}, title = {3DiffTection: 3D Object Detection with Geometry-Aware Diffusion Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10617-10627} }
Lift3D: Zero-Shot Lifting of Any 2D Vision Model to 3D-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{T_2024_CVPR, author = {T, Mukund Varma and Wang, Peihao and Fan, Zhiwen and Wang, Zhangyang and Su, Hao and Ramamoorthi, Ravi}, title = {Lift3D: Zero-Shot Lifting of Any 2D Vision Model to 3D}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21367-21377} }
LowRankOcc: Tensor Decomposition and Low-Rank Recovery for Vision-based 3D Semantic Occupancy Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Linqing and Xu, Xiuwei and Wang, Ziwei and Zhang, Yunpeng and Zhang, Borui and Zheng, Wenzhao and Du, Dalong and Zhou, Jie and Lu, Jiwen}, title = {LowRankOcc: Tensor Decomposition and Low-Rank Recovery for Vision-based 3D Semantic Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9806-9815} }
Multiway Point Cloud Mosaicking with Diffusion and Global Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jin_2024_CVPR, author = {Jin, Shengze and Armeni, Iro and Pollefeys, Marc and Barath, Daniel}, title = {Multiway Point Cloud Mosaicking with Diffusion and Global Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20838-20849} }
Novel View Synthesis with View-Dependent Effects from a Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bello_2024_CVPR, author = {Bello, Juan Luis Gonzalez and Kim, Munchurl}, title = {Novel View Synthesis with View-Dependent Effects from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10413-10423} }
Point2RBox: Combine Knowledge from Synthetic Visual Patterns for End-to-end Oriented Object Detection with Single Point Supervision-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Yi and Yang, Xue and Li, Qingyun and Da, Feipeng and Dai, Jifeng and Qiao, Yu and Yan, Junchi}, title = {Point2RBox: Combine Knowledge from Synthetic Visual Patterns for End-to-end Oriented Object Detection with Single Point Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16783-16793} }
PBWR: Parametric-Building-Wireframe Reconstruction from Aerial LiDAR Point Clouds-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Shangfeng and Wang, Ruisheng and Guo, Bo and Yang, Hongxin}, title = {PBWR: Parametric-Building-Wireframe Reconstruction from Aerial LiDAR Point Clouds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27778-27787} }
Spectrum AUC Difference (SAUCD): Human-aligned 3D Shape Evaluation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luan_2024_CVPR, author = {Luan, Tianyu and Li, Zhong and Chen, Lele and Gong, Xuan and Chen, Lichang and Xu, Yi and Yuan, Junsong}, title = {Spectrum AUC Difference (SAUCD): Human-aligned 3D Shape Evaluation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20155-20164} }
HRVDA: High-Resolution Visual Document Assistant-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Chaohu and Yin, Kun and Cao, Haoyu and Jiang, Xinghua and Li, Xin and Liu, Yinsong and Jiang, Deqiang and Sun, Xing and Xu, Linli}, title = {HRVDA: High-Resolution Visual Document Assistant}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15534-15545} }
Learning for Transductive Threshold Calibration in Open-World Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Qin and An, Dongsheng and Xiao, Tianjun and He, Tong and Tang, Qingming and Wu, Ying Nian and Tighe, Joseph and Xing, Yifan}, title = {Learning for Transductive Threshold Calibration in Open-World Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17097-17106} }
Weakly-Supervised Emotion Transition Learning for Diverse 3D Co-speech Gesture Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qi_2024_CVPR, author = {Qi, Xingqun and Pan, Jiahao and Li, Peng and Yuan, Ruibin and Chi, Xiaowei and Li, Mengfei and Luo, Wenhan and Xue, Wei and Zhang, Shanghang and Liu, Qifeng and Guo, Yike}, title = {Weakly-Supervised Emotion Transition Learning for Diverse 3D Co-speech Gesture Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10424-10434} }
Multi-Session SLAM with Differentiable Wide-Baseline Pose Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lipson_2024_CVPR, author = {Lipson, Lahav and Deng, Jia}, title = {Multi-Session SLAM with Differentiable Wide-Baseline Pose Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19626-19635} }
A Dual-Augmentor Framework for Domain Generalization in 3D Human Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2024_CVPR, author = {Peng, Qucheng and Zheng, Ce and Chen, Chen}, title = {A Dual-Augmentor Framework for Domain Generalization in 3D Human Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2240-2249} }
Improving Out-of-Distribution Generalization in Graphs via Hierarchical Semantic Environments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Piao_2024_CVPR, author = {Piao, Yinhua and Lee, Sangseon and Lu, Yijingxiu and Kim, Sun}, title = {Improving Out-of-Distribution Generalization in Graphs via Hierarchical Semantic Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27631-27640} }
CN-RMA: Combined Network with Ray Marching Aggregation for 3D Indoor Object Detection from Multi-view Images-
[pdf]
[supp]
[bibtex]@InProceedings{Shen_2024_CVPR, author = {Shen, Guanlin and Huang, Jingwei and Hu, Zhihua and Wang, Bin}, title = {CN-RMA: Combined Network with Ray Marching Aggregation for 3D Indoor Object Detection from Multi-view Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21326-21335} }
ACT-Diffusion: Efficient Adversarial Consistency Training for One-step Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Kong_2024_CVPR, author = {Kong, Fei and Duan, Jinhao and Sun, Lichao and Cheng, Hao and Xu, Renjing and Shen, Hengtao and Zhu, Xiaofeng and Shi, Xiaoshuang and Xu, Kaidi}, title = {ACT-Diffusion: Efficient Adversarial Consistency Training for One-step Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8890-8899} }
Spectral Meets Spatial: Harmonising 3D Shape Matching and Interpolation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2024_CVPR, author = {Cao, Dongliang and Eisenberger, Marvin and El Amrani, Nafie and Cremers, Daniel and Bernard, Florian}, title = {Spectral Meets Spatial: Harmonising 3D Shape Matching and Interpolation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3658-3668} }
Emu Edit: Precise Image Editing via Recognition and Generation Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sheynin_2024_CVPR, author = {Sheynin, Shelly and Polyak, Adam and Singer, Uriel and Kirstain, Yuval and Zohar, Amit and Ashual, Oron and Parikh, Devi and Taigman, Yaniv}, title = {Emu Edit: Precise Image Editing via Recognition and Generation Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8871-8879} }
Face2Diffusion for Fast and Editable Face Personalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shiohara_2024_CVPR, author = {Shiohara, Kaede and Yamasaki, Toshihiko}, title = {Face2Diffusion for Fast and Editable Face Personalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6850-6859} }
Causal-CoG: A Causal-Effect Look at Context Generation for Boosting Multi-modal Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Shitian and Li, Zhuowan and Lu, Yadong and Yuille, Alan and Wang, Yan}, title = {Causal-CoG: A Causal-Effect Look at Context Generation for Boosting Multi-modal Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13342-13351} }
Hide in Thicket: Generating Imperceptible and Rational Adversarial Perturbations on 3D Point Clouds-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lou_2024_CVPR, author = {Lou, Tianrui and Jia, Xiaojun and Gu, Jindong and Liu, Li and Liang, Siyuan and He, Bangyan and Cao, Xiaochun}, title = {Hide in Thicket: Generating Imperceptible and Rational Adversarial Perturbations on 3D Point Clouds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24326-24335} }
SG-BEV: Satellite-Guided BEV Fusion for Cross-View Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Ye_2024_CVPR, author = {Ye, Junyan and Luo, Qiyan and Yu, Jinhua and Zhong, Huaping and Zheng, Zhimeng and He, Conghui and Li, Weijia}, title = {SG-BEV: Satellite-Guided BEV Fusion for Cross-View Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27748-27757} }
Brush2Prompt: Contextual Prompt Generator for Object Inpainting-
[pdf]
[supp]
[bibtex]@InProceedings{Chiu_2024_CVPR, author = {Chiu, Mang Tik and Zhou, Yuqian and Zhang, Lingzhi and Lin, Zhe and Barnes, Connelly and Amirghodsi, Sohrab and Shechtman, Eli and Shi, Humphrey}, title = {Brush2Prompt: Contextual Prompt Generator for Object Inpainting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12636-12645} }
Joint-Task Regularization for Partially Labeled Multi-Task Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nishi_2024_CVPR, author = {Nishi, Kento and Kim, Junsik and Li, Wanhua and Pfister, Hanspeter}, title = {Joint-Task Regularization for Partially Labeled Multi-Task Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16152-16162} }
Shallow-Deep Collaborative Learning for Unsupervised Visible-Infrared Person Re-Identification-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Bin and Chen, Jun and Ye, Mang}, title = {Shallow-Deep Collaborative Learning for Unsupervised Visible-Infrared Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16870-16879} }
Dancing with Still Images: Video Distillation via Static-Dynamic Disentanglement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Ziyu and Xu, Yue and Lu, Cewu and Li, Yong-Lu}, title = {Dancing with Still Images: Video Distillation via Static-Dynamic Disentanglement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6296-6304} }
Context-Aware Integration of Language and Visual References for Natural Language Tracking-
[pdf]
[arXiv]
[bibtex]@InProceedings{Shao_2024_CVPR, author = {Shao, Yanyan and He, Shuting and Ye, Qi and Feng, Yuchao and Luo, Wenhan and Chen, Jiming}, title = {Context-Aware Integration of Language and Visual References for Natural Language Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19208-19217} }
An Edit Friendly DDPM Noise Space: Inversion and Manipulations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huberman-Spiegelglas_2024_CVPR, author = {Huberman-Spiegelglas, Inbar and Kulikov, Vladimir and Michaeli, Tomer}, title = {An Edit Friendly DDPM Noise Space: Inversion and Manipulations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12469-12478} }
LEAP-VO: Long-term Effective Any Point Tracking for Visual Odometry-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Weirong and Chen, Le and Wang, Rui and Pollefeys, Marc}, title = {LEAP-VO: Long-term Effective Any Point Tracking for Visual Odometry}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19844-19853} }
RoDLA: Benchmarking the Robustness of Document Layout Analysis Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Yufan and Zhang, Jiaming and Peng, Kunyu and Zheng, Junwei and Liu, Ruiping and Torr, Philip and Stiefelhagen, Rainer}, title = {RoDLA: Benchmarking the Robustness of Document Layout Analysis Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15556-15566} }
UniRepLKNet: A Universal Perception Large-Kernel ConvNet for Audio Video Point Cloud Time-Series and Image Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ding_2024_CVPR, author = {Ding, Xiaohan and Zhang, Yiyuan and Ge, Yixiao and Zhao, Sijie and Song, Lin and Yue, Xiangyu and Shan, Ying}, title = {UniRepLKNet: A Universal Perception Large-Kernel ConvNet for Audio Video Point Cloud Time-Series and Image Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5513-5524} }
Unveiling the Unknown: Unleashing the Power of Unknown to Known in Open-Set Source-Free Domain Adaptation-
[pdf]
[supp]
[bibtex]@InProceedings{Wan_2024_CVPR, author = {Wan, Fuli and Zhao, Han and Yang, Xu and Deng, Cheng}, title = {Unveiling the Unknown: Unleashing the Power of Unknown to Known in Open-Set Source-Free Domain Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24015-24024} }
BilevelPruning: Unified Dynamic and Static Channel Pruning for Convolutional Neural Networks-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2024_CVPR, author = {Gao, Shangqian and Zhang, Yanfu and Huang, Feihu and Huang, Heng}, title = {BilevelPruning: Unified Dynamic and Static Channel Pruning for Convolutional Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16090-16100} }
IDGuard: Robust General Identity-centric POI Proactive Defense Against Face Editing Abuse-
[pdf]
[supp]
[bibtex]@InProceedings{Dai_2024_CVPR, author = {Dai, Yunshu and Fei, Jianwei and Huang, Fangjun}, title = {IDGuard: Robust General Identity-centric POI Proactive Defense Against Face Editing Abuse}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11934-11943} }
SwiftBrush: One-Step Text-to-Image Diffusion Model with Variational Score Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2024_CVPR, author = {Nguyen, Thuan Hoang and Tran, Anh}, title = {SwiftBrush: One-Step Text-to-Image Diffusion Model with Variational Score Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7807-7816} }
DEADiff: An Efficient Stylization Diffusion Model with Disentangled Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qi_2024_CVPR, author = {Qi, Tianhao and Fang, Shancheng and Wu, Yanze and Xie, Hongtao and Liu, Jiawei and Chen, Lang and He, Qian and Zhang, Yongdong}, title = {DEADiff: An Efficient Stylization Diffusion Model with Disentangled Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8693-8702} }
Instance-Adaptive and Geometric-Aware Keypoint Learning for Category-Level 6D Object Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2024_CVPR, author = {Lin, Xiao and Yang, Wenfei and Gao, Yuan and Zhang, Tianzhu}, title = {Instance-Adaptive and Geometric-Aware Keypoint Learning for Category-Level 6D Object Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21040-21049} }
Universal Semi-Supervised Domain Adaptation by Mitigating Common-Class Bias-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Wenyu and Liu, Qingmu and Cong, Felix Ong Wei and Ragab, Mohamed and Foo, Chuan-Sheng}, title = {Universal Semi-Supervised Domain Adaptation by Mitigating Common-Class Bias}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23912-23921} }
Exact Fusion via Feature Distribution Matching for Few-shot Image Generation-
[pdf]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Yingbo and Ye, Yutong and Zhang, Pengyu and Wei, Xian and Chen, Mingsong}, title = {Exact Fusion via Feature Distribution Matching for Few-shot Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8383-8392} }
CoDeF: Content Deformation Fields for Temporally Consistent Video Processing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ouyang_2024_CVPR, author = {Ouyang, Hao and Wang, Qiuyu and Xiao, Yuxi and Bai, Qingyan and Zhang, Juntao and Zheng, Kecheng and Zhou, Xiaowei and Chen, Qifeng and Shen, Yujun}, title = {CoDeF: Content Deformation Fields for Temporally Consistent Video Processing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8089-8099} }
QUADify: Extracting Meshes with Pixel-level Details and Materials from Images-
[pdf]
[supp]
[bibtex]@InProceedings{Fruhauf_2024_CVPR, author = {Fr\"uhauf, Maximilian and Riemenschneider, Hayko and Gross, Markus and Schroers, Christopher}, title = {QUADify: Extracting Meshes with Pixel-level Details and Materials from Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4661-4670} }
RecDiffusion: Rectangling for Image Stitching with Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Tianhao and Li, Haipeng and Wang, Ziyi and Luo, Ao and Zhang, Chen-Lin and Li, Jiajun and Zeng, Bing and Liu, Shuaicheng}, title = {RecDiffusion: Rectangling for Image Stitching with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2692-2701} }
Eclipse: Disambiguating Illumination and Materials using Unintended Shadows-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Verbin_2024_CVPR, author = {Verbin, Dor and Mildenhall, Ben and Hedman, Peter and Barron, Jonathan T. and Zickler, Todd and Srinivasan, Pratul P.}, title = {Eclipse: Disambiguating Illumination and Materials using Unintended Shadows}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {77-86} }
Feature 3DGS: Supercharging 3D Gaussian Splatting to Enable Distilled Feature Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Shijie and Chang, Haoran and Jiang, Sicheng and Fan, Zhiwen and Zhu, Zehao and Xu, Dejia and Chari, Pradyumna and You, Suya and Wang, Zhangyang and Kadambi, Achuta}, title = {Feature 3DGS: Supercharging 3D Gaussian Splatting to Enable Distilled Feature Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {21676-21685} }
Balancing Act: Distribution-Guided Debiasing in Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Parihar_2024_CVPR, author = {Parihar, Rishubh and Bhat, Abhijnya and Basu, Abhipsa and Mallick, Saswat and Kundu, Jogendra Nath and Babu, R. Venkatesh}, title = {Balancing Act: Distribution-Guided Debiasing in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6668-6678} }
Viewpoint-Aware Visual Grounding in 3D Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Shi_2024_CVPR, author = {Shi, Xiangxi and Wu, Zhonghua and Lee, Stefan}, title = {Viewpoint-Aware Visual Grounding in 3D Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14056-14065} }
4K4D: Real-Time 4D View Synthesis at 4K Resolution-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Zhen and Peng, Sida and Lin, Haotong and He, Guangzhao and Sun, Jiaming and Shen, Yujun and Bao, Hujun and Zhou, Xiaowei}, title = {4K4D: Real-Time 4D View Synthesis at 4K Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20029-20040} }
View-decoupled Transformer for Person Re-identification under Aerial-ground Camera Network-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Quan and Wang, Lei and Patel, Vishal M. and Xie, Xiaohua and Lai, Jianhaung}, title = {View-decoupled Transformer for Person Re-identification under Aerial-ground Camera Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22000-22009} }
CRKD: Enhanced Camera-Radar Object Detection with Cross-modality Knowledge Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Lingjun and Song, Jingyu and Skinner, Katherine A.}, title = {CRKD: Enhanced Camera-Radar Object Detection with Cross-modality Knowledge Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15470-15480} }
Differentiable Point-based Inverse Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chung_2024_CVPR, author = {Chung, Hoon-Gyu and Choi, Seokjun and Baek, Seung-Hwan}, title = {Differentiable Point-based Inverse Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4399-4409} }
OED: Towards One-stage End-to-End Dynamic Scene Graph Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Guan and Li, Zhimin and Chen, Qingchao and Liu, Yang}, title = {OED: Towards One-stage End-to-End Dynamic Scene Graph Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27938-27947} }
CoG-DQA: Chain-of-Guiding Learning with Large Language Models for Diagram Question Answering-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Shaowei and Zhang, Lingling and Zhu, Longji and Qin, Tao and Yap, Kim-Hui and Zhang, Xinyu and Liu, Jun}, title = {CoG-DQA: Chain-of-Guiding Learning with Large Language Models for Diagram Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13969-13979} }
Transferable and Principled Efficiency for Open-Vocabulary Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Jingxuan and Chen, Wuyang and Zhao, Yao and Wei, Yunchao}, title = {Transferable and Principled Efficiency for Open-Vocabulary Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15814-15824} }
A Unified and Interpretable Emotion Representation and Expression Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Paskaleva_2024_CVPR, author = {Paskaleva, Reni and Holubakha, Mykyta and Ilic, Andela and Motamed, Saman and Van Gool, Luc and Paudel, Danda}, title = {A Unified and Interpretable Emotion Representation and Expression Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2447-2456} }
Upscale-A-Video: Temporal-Consistent Diffusion Model for Real-World Video Super-Resolution-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Shangchen and Yang, Peiqing and Wang, Jianyi and Luo, Yihang and Loy, Chen Change}, title = {Upscale-A-Video: Temporal-Consistent Diffusion Model for Real-World Video Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2535-2545} }
EvDiG: Event-guided Direct and Global Components Separation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Xinyu and Duan, Peiqi and Li, Boyu and Zhou, Chu and Xu, Chao and Shi, Boxin}, title = {EvDiG: Event-guided Direct and Global Components Separation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9612-9621} }
DeIL: Direct-and-Inverse CLIP for Open-World Few-Shot Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Shao_2024_CVPR, author = {Shao, Shuai and Bai, Yu and Wang, Yan and Liu, Baodi and Zhou, Yicong}, title = {DeIL: Direct-and-Inverse CLIP for Open-World Few-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28505-28514} }
4D-DRESS: A 4D Dataset of Real-World Human Clothing With Semantic Annotations-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Wenbo and Ho, Hsuan-I and Guo, Chen and Rong, Boxiang and Grigorev, Artur and Song, Jie and Zarate, Juan Jose and Hilliges, Otmar}, title = {4D-DRESS: A 4D Dataset of Real-World Human Clothing With Semantic Annotations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {550-560} }
Feedback-Guided Autonomous Driving-
[pdf]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Jimuyang and Huang, Zanming and Ray, Arijit and Ohn-Bar, Eshed}, title = {Feedback-Guided Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15000-15011} }
Large Language Models are Good Prompt Learners for Low-Shot Image Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2024_CVPR, author = {Zheng, Zhaoheng and Wei, Jingmin and Hu, Xuefeng and Zhu, Haidong and Nevatia, Ram}, title = {Large Language Models are Good Prompt Learners for Low-Shot Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28453-28462} }
Specularity Factorization for Low-Light Enhancement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Saini_2024_CVPR, author = {Saini, Saurabh and Narayanan, P J}, title = {Specularity Factorization for Low-Light Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1-12} }
Paint3D: Paint Anything 3D with Lighting-Less Texture Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2024_CVPR, author = {Zeng, Xianfang and Chen, Xin and Qi, Zhongqi and Liu, Wen and Zhao, Zibo and Wang, Zhibin and Fu, Bin and Liu, Yong and Yu, Gang}, title = {Paint3D: Paint Anything 3D with Lighting-Less Texture Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4252-4262} }
VILA: On Pre-training for Visual Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2024_CVPR, author = {Lin, Ji and Yin, Hongxu and Ping, Wei and Molchanov, Pavlo and Shoeybi, Mohammad and Han, Song}, title = {VILA: On Pre-training for Visual Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26689-26699} }
DiLiGenRT: A Photometric Stereo Dataset with Quantified Roughness and Translucency-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2024_CVPR, author = {Guo, Heng and Ren, Jieji and Wang, Feishi and Shi, Boxin and Ren, Mingjun and Matsushita, Yasuyuki}, title = {DiLiGenRT: A Photometric Stereo Dataset with Quantified Roughness and Translucency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11810-11820} }
De-Diffusion Makes Text a Strong Cross-Modal Interface-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2024_CVPR, author = {Wei, Chen and Liu, Chenxi and Qiao, Siyuan and Zhang, Zhishuai and Yuille, Alan and Yu, Jiahui}, title = {De-Diffusion Makes Text a Strong Cross-Modal Interface}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13492-13503} }
End-to-End Spatio-Temporal Action Localisation with Video Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gritsenko_2024_CVPR, author = {Gritsenko, Alexey A. and Xiong, Xuehan and Djolonga, Josip and Dehghani, Mostafa and Sun, Chen and Lucic, Mario and Schmid, Cordelia and Arnab, Anurag}, title = {End-to-End Spatio-Temporal Action Localisation with Video Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18373-18383} }
Text-Guided Variational Image Generation for Industrial Anomaly Detection and Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2024_CVPR, author = {Lee, Mingyu and Choi, Jongwon}, title = {Text-Guided Variational Image Generation for Industrial Anomaly Detection and Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26519-26528} }
Self-Adaptive Reality-Guided Diffusion for Artifact-Free Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2024_CVPR, author = {Zheng, Qingping and Zheng, Ling and Guo, Yuanfan and Li, Ying and Xu, Songcen and Deng, Jiankang and Xu, Hang}, title = {Self-Adaptive Reality-Guided Diffusion for Artifact-Free Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25806-25816} }
End-to-End Temporal Action Detection with 1B Parameters Across 1000 Frames-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Shuming and Zhang, Chen-Lin and Zhao, Chen and Ghanem, Bernard}, title = {End-to-End Temporal Action Detection with 1B Parameters Across 1000 Frames}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18591-18601} }
Multimodal Representation Learning by Alternating Unimodal Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Xiaohui and Yoon, Jaehong and Bansal, Mohit and Yao, Huaxiu}, title = {Multimodal Representation Learning by Alternating Unimodal Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27456-27466} }
MS-MANO: Enabling Hand Pose Tracking with Biomechanical Constraints-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2024_CVPR, author = {Xie, Pengfei and Xu, Wenqiang and Tang, Tutian and Yu, Zhenjun and Lu, Cewu}, title = {MS-MANO: Enabling Hand Pose Tracking with Biomechanical Constraints}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2382-2392} }
Generate Like Experts: Multi-Stage Font Generation by Incorporating Font Transfer Process into Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Fu_2024_CVPR, author = {Fu, Bin and Yu, Fanghua and Liu, Anran and Wang, Zixuan and Wen, Jie and He, Junjun and Qiao, Yu}, title = {Generate Like Experts: Multi-Stage Font Generation by Incorporating Font Transfer Process into Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6892-6901} }
Pre-training Vision Models with Mandelbulb Variations-
[pdf]
[supp]
[bibtex]@InProceedings{Chiche_2024_CVPR, author = {Chiche, Benjamin Naoto and Horikawa, Yuto and Fujita, Ryo}, title = {Pre-training Vision Models with Mandelbulb Variations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22062-22071} }
Diffuse Attend and Segment: Unsupervised Zero-Shot Segmentation using Stable Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tian_2024_CVPR, author = {Tian, Junjiao and Aggarwal, Lavisha and Colaco, Andrea and Kira, Zsolt and Gonzalez-Franco, Mar}, title = {Diffuse Attend and Segment: Unsupervised Zero-Shot Segmentation using Stable Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3554-3563} }
TransNeXt: Robust Foveal Visual Perception for Vision Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2024_CVPR, author = {Shi, Dai}, title = {TransNeXt: Robust Foveal Visual Perception for Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17773-17783} }
Implicit Discriminative Knowledge Learning for Visible-Infrared Person Re-Identification-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ren_2024_CVPR, author = {Ren, Kaijie and Zhang, Lei}, title = {Implicit Discriminative Knowledge Learning for Visible-Infrared Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {393-402} }
Modeling Dense Multimodal Interactions Between Biological Pathways and Histology for Survival Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jaume_2024_CVPR, author = {Jaume, Guillaume and Vaidya, Anurag and Chen, Richard J. and Williamson, Drew F.K. and Liang, Paul Pu and Mahmood, Faisal}, title = {Modeling Dense Multimodal Interactions Between Biological Pathways and Histology for Survival Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11579-11590} }
Mining Supervision for Dynamic Regions in Self-Supervised Monocular Depth Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2024_CVPR, author = {Nguyen, Hoang Chuong and Wang, Tianyu and Alvarez, Jose M. and Liu, Miaomiao}, title = {Mining Supervision for Dynamic Regions in Self-Supervised Monocular Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10446-10455} }
Gradient Alignment for Cross-Domain Face Anti-Spoofing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Le_2024_CVPR, author = {Le, Binh M. and Woo, Simon S.}, title = {Gradient Alignment for Cross-Domain Face Anti-Spoofing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {188-199} }
Physics-guided Shape-from-Template: Monocular Video Perception through Neural Surrogate Models-
[pdf]
[supp]
[bibtex]@InProceedings{Stotko_2024_CVPR, author = {Stotko, David and Wandel, Nils and Klein, Reinhard}, title = {Physics-guided Shape-from-Template: Monocular Video Perception through Neural Surrogate Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11895-11904} }
S2MVTC: a Simple yet Efficient Scalable Multi-View Tensor Clustering-
[pdf]
[bibtex]@InProceedings{Long_2024_CVPR, author = {Long, Zhen and Wang, Qiyuan and Ren, Yazhou and Liu, Yipeng and Zhu, Ce}, title = {S2MVTC: a Simple yet Efficient Scalable Multi-View Tensor Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26213-26222} }
OpticalDR: A Deep Optical Imaging Model for Privacy-Protective Depression Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2024_CVPR, author = {Pan, Yuchen and Jiang, Junjun and Jiang, Kui and Wu, Zhihao and Yu, Keyuan and Liu, Xianming}, title = {OpticalDR: A Deep Optical Imaging Model for Privacy-Protective Depression Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1303-1312} }
Observation-Guided Diffusion Probabilistic Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2024_CVPR, author = {Kang, Junoh and Choi, Jinyoung and Choi, Sungik and Han, Bohyung}, title = {Observation-Guided Diffusion Probabilistic Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8323-8331} }
You'll Never Walk Alone: A Sketch and Text Duet for Fine-Grained Image Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Koley_2024_CVPR, author = {Koley, Subhadeep and Bhunia, Ayan Kumar and Sain, Aneeshan and Chowdhury, Pinaki Nath and Xiang, Tao and Song, Yi-Zhe}, title = {You'll Never Walk Alone: A Sketch and Text Duet for Fine-Grained Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16509-16519} }
Spatial-Aware Regression for Keypoint Localization-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Dongkai and Zhang, Shiliang}, title = {Spatial-Aware Regression for Keypoint Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {624-633} }
S2MAE: A Spatial-Spectral Pretraining Foundation Model for Spectral Remote Sensing Data-
[pdf]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Xuyang and Hong, Danfeng and Chanussot, Jocelyn}, title = {S2MAE: A Spatial-Spectral Pretraining Foundation Model for Spectral Remote Sensing Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24088-24097} }
EFormer: Enhanced Transformer towards Semantic-Contour Features of Foreground for Portraits Matting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Zitao and Miao, Qiguang and Xi, Yue and Zhao, Peipei}, title = {EFormer: Enhanced Transformer towards Semantic-Contour Features of Foreground for Portraits Matting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3880-3889} }
MultiPly: Reconstruction of Multiple People from Monocular Video in the Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2024_CVPR, author = {Jiang, Zeren and Guo, Chen and Kaufmann, Manuel and Jiang, Tianjian and Valentin, Julien and Hilliges, Otmar and Song, Jie}, title = {MultiPly: Reconstruction of Multiple People from Monocular Video in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {109-118} }
Unsupervised 3D Structure Inference from Category-Specific Image Collections-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Weikang and Cao, Dongliang and Bernard, Florian}, title = {Unsupervised 3D Structure Inference from Category-Specific Image Collections}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10704-10714} }
DiG-IN: Diffusion Guidance for Investigating Networks - Uncovering Classifier Differences Neuron Visualisations and Visual Counterfactual Explanations-
[pdf]
[supp]
[bibtex]@InProceedings{Augustin_2024_CVPR, author = {Augustin, Maximilian and Neuhaus, Yannic and Hein, Matthias}, title = {DiG-IN: Diffusion Guidance for Investigating Networks - Uncovering Classifier Differences Neuron Visualisations and Visual Counterfactual Explanations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11093-11103} }
RepViT: Revisiting Mobile CNN From ViT Perspective-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Ao and Chen, Hui and Lin, Zijia and Han, Jungong and Ding, Guiguang}, title = {RepViT: Revisiting Mobile CNN From ViT Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15909-15920} }
MonoNPHM: Dynamic Head Reconstruction from Monocular Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Giebenhain_2024_CVPR, author = {Giebenhain, Simon and Kirschstein, Tobias and Georgopoulos, Markos and R\"unz, Martin and Agapito, Lourdes and Nie{\ss}ner, Matthias}, title = {MonoNPHM: Dynamic Head Reconstruction from Monocular Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10747-10758} }
Realigning Confidence with Temporal Saliency Information for Point-Level Weakly-Supervised Temporal Action Localization-
[pdf]
[supp]
[bibtex]@InProceedings{Xia_2024_CVPR, author = {Xia, Ziying and Cheng, Jian and Liu, Siyu and Hu, Yongxiang and Wang, Shiguang and Zhang, Yijie and Dang, Liwan}, title = {Realigning Confidence with Temporal Saliency Information for Point-Level Weakly-Supervised Temporal Action Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18440-18450} }
ConsistNet: Enforcing 3D Consistency for Multi-view Images Diffusion-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Jiayu and Cheng, Ziang and Duan, Yunfei and Ji, Pan and Li, Hongdong}, title = {ConsistNet: Enforcing 3D Consistency for Multi-view Images Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7079-7088} }
GenN2N: Generative NeRF2NeRF Translation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Xiangyue and Xue, Han and Luo, Kunming and Tan, Ping and Yi, Li}, title = {GenN2N: Generative NeRF2NeRF Translation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5105-5114} }
Theoretically Achieving Continuous Representation of Oriented Bounding Boxes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2024_CVPR, author = {Xiao, Zikai and Yang, Guoye and Yang, Xue and Mu, Taijiang and Yan, Junchi and Hu, Shimin}, title = {Theoretically Achieving Continuous Representation of Oriented Bounding Boxes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16912-16922} }
Universal Robustness via Median Randomized Smoothing for Real-World Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chaouai_2024_CVPR, author = {Chaouai, Zakariya and Tamaazousti, Mohamed}, title = {Universal Robustness via Median Randomized Smoothing for Real-World Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9059-9068} }
One-dimensional Adapter to Rule Them All: Concepts Diffusion Models and Erasing Applications-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lyu_2024_CVPR, author = {Lyu, Mengyao and Yang, Yuhong and Hong, Haiwen and Chen, Hui and Jin, Xuan and He, Yuan and Xue, Hui and Han, Jungong and Ding, Guiguang}, title = {One-dimensional Adapter to Rule Them All: Concepts Diffusion Models and Erasing Applications}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7559-7568} }
Learning Large-Factor EM Image Super-Resolution with Generative Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Shou_2024_CVPR, author = {Shou, Jiateng and Xiao, Zeyu and Deng, Shiyu and Huang, Wei and Shi, Peiyao and Zhang, Ruobing and Xiong, Zhiwei and Wu, Feng}, title = {Learning Large-Factor EM Image Super-Resolution with Generative Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11313-11322} }
DIMAT: Decentralized Iterative Merging-And-Training for Deep Learning Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Saadati_2024_CVPR, author = {Saadati, Nastaran and Pham, Minh and Saleem, Nasla and Waite, Joshua R. and Balu, Aditya and Jiang, Zhanong and Hegde, Chinmay and Sarkar, Soumik}, title = {DIMAT: Decentralized Iterative Merging-And-Training for Deep Learning Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27517-27527} }
MMA: Multi-Modal Adapter for Vision-Language Models-
[pdf]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Lingxiao and Zhang, Ru-Yuan and Wang, Yanchen and Xie, Xiaohua}, title = {MMA: Multi-Modal Adapter for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23826-23837} }
Kandinsky Conformal Prediction: Efficient Calibration of Image Segmentation Algorithms-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Brunekreef_2024_CVPR, author = {Brunekreef, Joren and Marcus, Eric and Sheombarsing, Ray and Sonke, Jan-Jakob and Teuwen, Jonas}, title = {Kandinsky Conformal Prediction: Efficient Calibration of Image Segmentation Algorithms}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4135-4143} }
Diversity-aware Channel Pruning for StyleGAN Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chung_2024_CVPR, author = {Chung, Jiwoo and Hyun, Sangeek and Shim, Sang-Heon and Heo, Jae-Pil}, title = {Diversity-aware Channel Pruning for StyleGAN Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7902-7911} }
BioCLIP: A Vision Foundation Model for the Tree of Life-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Stevens_2024_CVPR, author = {Stevens, Samuel and Wu, Jiaman and Thompson, Matthew J and Campolongo, Elizabeth G and Song, Chan Hee and Carlyn, David Edward and Dong, Li and Dahdul, Wasila M and Stewart, Charles and Berger-Wolf, Tanya and Chao, Wei-Lun and Su, Yu}, title = {BioCLIP: A Vision Foundation Model for the Tree of Life}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19412-19424} }
From Pixels to Graphs: Open-Vocabulary Scene Graph Generation with Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Rongjie and Zhang, Songyang and Lin, Dahua and Chen, Kai and He, Xuming}, title = {From Pixels to Graphs: Open-Vocabulary Scene Graph Generation with Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28076-28086} }
Deep Imbalanced Regression via Hierarchical Classification Adjustment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiong_2024_CVPR, author = {Xiong, Haipeng and Yao, Angela}, title = {Deep Imbalanced Regression via Hierarchical Classification Adjustment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23721-23730} }
Adaptive Fusion of Single-View and Multi-View Depth for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2024_CVPR, author = {Cheng, Junda and Yin, Wei and Wang, Kaixuan and Chen, Xiaozhi and Wang, Shijie and Yang, Xin}, title = {Adaptive Fusion of Single-View and Multi-View Depth for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10138-10147} }
Neural Clustering based Visual Representation Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Guikun and Li, Xia and Yang, Yi and Wang, Wenguan}, title = {Neural Clustering based Visual Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5714-5725} }
Continual Self-supervised Learning: Towards Universal Multi-modal Medical Data Representation Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2024_CVPR, author = {Ye, Yiwen and Xie, Yutong and Zhang, Jianpeng and Chen, Ziyang and Wu, Qi and Xia, Yong}, title = {Continual Self-supervised Learning: Towards Universal Multi-modal Medical Data Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11114-11124} }
Sparse Semi-DETR: Sparse Learnable Queries for Semi-Supervised Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Shehzadi_2024_CVPR, author = {Shehzadi, Tahira and Hashmi, Khurram Azeem and Stricker, Didier and Afzal, Muhammad Zeshan}, title = {Sparse Semi-DETR: Sparse Learnable Queries for Semi-Supervised Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5840-5850} }
Towards Efficient Replay in Federated Incremental Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Yichen and Li, Qunwei and Wang, Haozhao and Li, Ruixuan and Zhong, Wenliang and Zhang, Guannan}, title = {Towards Efficient Replay in Federated Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12820-12829} }
SimAC: A Simple Anti-Customization Method for Protecting Face Privacy against Text-to-Image Synthesis of Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Feifei and Tan, Zhentao and Wei, Tianyi and Wu, Yue and Huang, Qidong}, title = {SimAC: A Simple Anti-Customization Method for Protecting Face Privacy against Text-to-Image Synthesis of Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12047-12056} }
Total-Decom: Decomposed 3D Scene Reconstruction with Minimal Interaction-
[pdf]
[supp]
[bibtex]@InProceedings{Lyu_2024_CVPR, author = {Lyu, Xiaoyang and Chang, Chirui and Dai, Peng and Sun, Yang-Tian and Qi, Xiaojuan}, title = {Total-Decom: Decomposed 3D Scene Reconstruction with Minimal Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20860-20869} }
Accelerating Neural Field Training via Soft Mining-
[pdf]
[arXiv]
[bibtex]@InProceedings{Kheradmand_2024_CVPR, author = {Kheradmand, Shakiba and Rebain, Daniel and Sharma, Gopal and Isack, Hossam and Kar, Abhishek and Tagliasacchi, Andrea and Yi, Kwang Moo}, title = {Accelerating Neural Field Training via Soft Mining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {20071-20080} }
Ensemble Diversity Facilitates Adversarial Transferability-
[pdf]
[bibtex]@InProceedings{Tang_2024_CVPR, author = {Tang, Bowen and Wang, Zheng and Bin, Yi and Dou, Qi and Yang, Yang and Shen, Heng Tao}, title = {Ensemble Diversity Facilitates Adversarial Transferability}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24377-24386} }
Fair-VPT: Fair Visual Prompt Tuning for Image Classification-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2024_CVPR, author = {Park, Sungho and Byun, Hyeran}, title = {Fair-VPT: Fair Visual Prompt Tuning for Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12268-12278} }
Uncertainty-Aware Source-Free Adaptive Image Super-Resolution with Wavelet Augmentation Transformer-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ai_2024_CVPR, author = {Ai, Yuang and Zhou, Xiaoqiang and Huang, Huaibo and Zhang, Lei and He, Ran}, title = {Uncertainty-Aware Source-Free Adaptive Image Super-Resolution with Wavelet Augmentation Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8142-8152} }
Gear-NeRF: Free-Viewpoint Rendering and Tracking with Motion-aware Spatio-Temporal Sampling-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Xinhang and Tai, Yu-Wing and Tang, Chi-Keung and Miraldo, Pedro and Lohit, Suhas and Chatterjee, Moitreya}, title = {Gear-NeRF: Free-Viewpoint Rendering and Tracking with Motion-aware Spatio-Temporal Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19667-19679} }
CaDeT: a Causal Disentanglement Approach for Robust Trajectory Prediction in Autonomous Driving-
[pdf]
[supp]
[bibtex]@InProceedings{Pourkeshavarz_2024_CVPR, author = {Pourkeshavarz, Mozhgan and Zhang, Junrui and Rasouli, Amir}, title = {CaDeT: a Causal Disentanglement Approach for Robust Trajectory Prediction in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14874-14884} }
Spacetime Gaussian Feature Splatting for Real-Time Dynamic View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Zhan and Chen, Zhang and Li, Zhong and Xu, Yi}, title = {Spacetime Gaussian Feature Splatting for Real-Time Dynamic View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8508-8520} }
Instruct-Imagen: Image Generation with Multi-modal Instruction-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2024_CVPR, author = {Hu, Hexiang and Chan, Kelvin C.K. and Su, Yu-Chuan and Chen, Wenhu and Li, Yandong and Sohn, Kihyuk and Zhao, Yang and Ben, Xue and Gong, Boqing and Cohen, William and Chang, Ming-Wei and Jia, Xuhui}, title = {Instruct-Imagen: Image Generation with Multi-modal Instruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4754-4763} }
Prompting Vision Foundation Models for Pathology Image Analysis-
[pdf]
[supp]
[bibtex]@InProceedings{Yin_2024_CVPR, author = {Yin, Chong and Liu, Siqi and Zhou, Kaiyang and Wong, Vincent Wai-Sun and Yuen, Pong C.}, title = {Prompting Vision Foundation Models for Pathology Image Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11292-11301} }
Rethinking Few-shot 3D Point Cloud Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{An_2024_CVPR, author = {An, Zhaochong and Sun, Guolei and Liu, Yun and Liu, Fayao and Wu, Zongwei and Wang, Dan and Van Gool, Luc and Belongie, Serge}, title = {Rethinking Few-shot 3D Point Cloud Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3996-4006} }
SEED-Bench: Benchmarking Multimodal Large Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Bohao and Ge, Yuying and Ge, Yixiao and Wang, Guangzhi and Wang, Rui and Zhang, Ruimao and Shan, Ying}, title = {SEED-Bench: Benchmarking Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13299-13308} }
BrainWash: A Poisoning Attack to Forget in Continual Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Abbasi_2024_CVPR, author = {Abbasi, Ali and Nooralinejad, Parsa and Pirsiavash, Hamed and Kolouri, Soheil}, title = {BrainWash: A Poisoning Attack to Forget in Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24057-24067} }
GreedyViG: Dynamic Axial Graph Construction for Efficient Vision GNNs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Munir_2024_CVPR, author = {Munir, Mustafa and Avery, William and Rahman, Md Mostafijur and Marculescu, Radu}, title = {GreedyViG: Dynamic Axial Graph Construction for Efficient Vision GNNs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6118-6127} }
Relightable and Animatable Neural Avatar from Sparse-View Video-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Zhen and Peng, Sida and Geng, Chen and Mou, Linzhan and Yan, Zihan and Sun, Jiaming and Bao, Hujun and Zhou, Xiaowei}, title = {Relightable and Animatable Neural Avatar from Sparse-View Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {990-1000} }
FreePoint: Unsupervised Point Cloud Instance Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Zhikai and Ding, Jian and Jiang, Li and Dai, Dengxin and Xia, Guisong}, title = {FreePoint: Unsupervised Point Cloud Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {28254-28263} }
Pose Adapted Shape Learning for Large-Pose Face Reenactment-
[pdf]
[supp]
[bibtex]@InProceedings{Hsu_2024_CVPR, author = {Hsu, Gee-Sern Jison and Zhang, Jie-Ying and Hsiang, Huang Yu and Hong, Wei-Jie}, title = {Pose Adapted Shape Learning for Large-Pose Face Reenactment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7413-7422} }
Object Pose Estimation via the Aggregation of Diffusion Features-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Tianfu and Hu, Guosheng and Wang, Hongguang}, title = {Object Pose Estimation via the Aggregation of Diffusion Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10238-10247} }
Circuit Design and Efficient Simulation of Quantum Inner Product and Empirical Studies of Its Effect on Near-Term Hybrid Quantum-Classic Machine Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Xiong_2024_CVPR, author = {Xiong, Hao and Tang, Yehui and Ye, Xinyu and Yan, Junchi}, title = {Circuit Design and Efficient Simulation of Quantum Inner Product and Empirical Studies of Its Effect on Near-Term Hybrid Quantum-Classic Machine Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26162-26170} }
How to Make Cross Encoder a Good Teacher for Efficient Image-Text Retrieval?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Yuxin and Ma, Zongyang and Zhang, Ziqi and Qi, Zhongang and Yuan, Chunfeng and Li, Bing and Pu, Junfu and Shan, Ying and Qi, Xiaojuan and Hu, Weiming}, title = {How to Make Cross Encoder a Good Teacher for Efficient Image-Text Retrieval?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26994-27003} }
Diffeomorphic Template Registration for Atmospheric Turbulence Mitigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lao_2024_CVPR, author = {Lao, Dong and Wang, Congli and Wong, Alex and Soatto, Stefano}, title = {Diffeomorphic Template Registration for Atmospheric Turbulence Mitigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25107-25116} }
Selective Nonlinearities Removal from Digital Signals-
[pdf]
[bibtex]@InProceedings{Maliszewski_2024_CVPR, author = {Maliszewski, Krzysztof A. and Urba?ska, Magdalena A. and Vetrova, Varvara and Kolenderska, Sylwia M.}, title = {Selective Nonlinearities Removal from Digital Signals}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25028-25036} }
NB-GTR: Narrow-Band Guided Turbulence Removal-
[pdf]
[supp]
[bibtex]@InProceedings{Xia_2024_CVPR, author = {Xia, Yifei and Zhou, Chu and Zhu, Chengxuan and Teng, Minggui and Xu, Chao and Shi, Boxin}, title = {NB-GTR: Narrow-Band Guided Turbulence Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24934-24943} }
Can Biases in ImageNet Models Explain Generalization?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gavrikov_2024_CVPR, author = {Gavrikov, Paul and Keuper, Janis}, title = {Can Biases in ImageNet Models Explain Generalization?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {22184-22194} }
NRDF: Neural Riemannian Distance Fields for Learning Articulated Pose Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2024_CVPR, author = {He, Yannan and Tiwari, Garvita and Birdal, Tolga and Lenssen, Jan Eric and Pons-Moll, Gerard}, title = {NRDF: Neural Riemannian Distance Fields for Learning Articulated Pose Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1661-1671} }
RepAn: Enhanced Annealing through Re-parameterization-
[pdf]
[supp]
[bibtex]@InProceedings{Fei_2024_CVPR, author = {Fei, Xiang and Zheng, Xiawu and Wang, Yan and Chao, Fei and Wu, Chenglin and Cao, Liujuan}, title = {RepAn: Enhanced Annealing through Re-parameterization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5798-5808} }
Generative Quanta Color Imaging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Purohit_2024_CVPR, author = {Purohit, Vishal and Luo, Junjie and Chi, Yiheng and Guo, Qi and Chan, Stanley H. and Qiu, Qiang}, title = {Generative Quanta Color Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25138-25148} }
Panda-70M: Captioning 70M Videos with Multiple Cross-Modality Teachers-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Tsai-Shien and Siarohin, Aliaksandr and Menapace, Willi and Deyneka, Ekaterina and Chao, Hsiang-wei and Jeon, Byung Eun and Fang, Yuwei and Lee, Hsin-Ying and Ren, Jian and Yang, Ming-Hsuan and Tulyakov, Sergey}, title = {Panda-70M: Captioning 70M Videos with Multiple Cross-Modality Teachers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13320-13331} }
Overload: Latency Attacks on Object Detection for Edge Devices-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Erh-Chung and Chen, Pin-Yu and Chung, I-Hsin and Lee, Che-Rung}, title = {Overload: Latency Attacks on Object Detection for Edge Devices}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24716-24725} }
DreamControl: Control-Based Text-to-3D Generation with 3D Self-Prior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Tianyu and Zeng, Yihan and Zhang, Zhilu and Xu, Wan and Xu, Hang and Xu, Songcen and Lau, Rynson W.H. and Zuo, Wangmeng}, title = {DreamControl: Control-Based Text-to-3D Generation with 3D Self-Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5364-5373} }
Infrared Small Target Detection with Scale and Location Sensitivity-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Qiankun and Liu, Rui and Zheng, Bolun and Wang, Hongkui and Fu, Ying}, title = {Infrared Small Target Detection with Scale and Location Sensitivity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17490-17499} }
Self-supervised Debiasing Using Low Rank Regularization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2024_CVPR, author = {Park, Geon Yeong and Jung, Chanyong and Lee, Sangmin and Ye, Jong Chul and Lee, Sang Wan}, title = {Self-supervised Debiasing Using Low Rank Regularization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12395-12405} }
ODIN: A Single Model for 2D and 3D Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jain_2024_CVPR, author = {Jain, Ayush and Katara, Pushkal and Gkanatsios, Nikolaos and Harley, Adam W. and Sarch, Gabriel and Aggarwal, Kriti and Chaudhary, Vishrav and Fragkiadaki, Katerina}, title = {ODIN: A Single Model for 2D and 3D Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3564-3574} }
SD4Match: Learning to Prompt Stable Diffusion Model for Semantic Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Xinghui and Lu, Jingyi and Han, Kai and Prisacariu, Victor Adrian}, title = {SD4Match: Learning to Prompt Stable Diffusion Model for Semantic Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27558-27568} }
InitNO: Boosting Text-to-Image Diffusion Models via Initial Noise Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2024_CVPR, author = {Guo, Xiefan and Liu, Jinlin and Cui, Miaomiao and Li, Jiankai and Yang, Hongyu and Huang, Di}, title = {InitNO: Boosting Text-to-Image Diffusion Models via Initial Noise Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9380-9389} }
Neural Video Compression with Feature Modulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Jiahao and Li, Bin and Lu, Yan}, title = {Neural Video Compression with Feature Modulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26099-26108} }
Data Poisoning based Backdoor Attacks to Contrastive Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Jinghuai and Liu, Hongbin and Jia, Jinyuan and Gong, Neil Zhenqiang}, title = {Data Poisoning based Backdoor Attacks to Contrastive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {24357-24366} }
Multimodal Sense-Informed Forecasting of 3D Human Motions-
[pdf]
[bibtex]@InProceedings{Lou_2024_CVPR, author = {Lou, Zhenyu and Cui, Qiongjie and Wang, Haofan and Tang, Xu and Zhou, Hong}, title = {Multimodal Sense-Informed Forecasting of 3D Human Motions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2144-2154} }
FlowerFormer: Empowering Neural Architecture Encoding using a Flow-aware Graph Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hwang_2024_CVPR, author = {Hwang, Dongyeong and Kim, Hyunju and Kim, Sunwoo and Shin, Kijung}, title = {FlowerFormer: Empowering Neural Architecture Encoding using a Flow-aware Graph Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6128-6137} }
EmoGen: Emotional Image Content Generation with Text-to-Image Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Jingyuan and Feng, Jiawei and Huang, Hui}, title = {EmoGen: Emotional Image Content Generation with Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6358-6368} }
Finding Lottery Tickets in Vision Models via Data-driven Spectral Foresight Pruning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Iurada_2024_CVPR, author = {Iurada, Leonardo and Ciccone, Marco and Tommasi, Tatiana}, title = {Finding Lottery Tickets in Vision Models via Data-driven Spectral Foresight Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16142-16151} }
InNeRF360: Text-Guided 3D-Consistent Object Inpainting on 360-degree Neural Radiance Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Dongqing and Zhang, Tong and Abboud, Alaa and S\"usstrunk, Sabine}, title = {InNeRF360: Text-Guided 3D-Consistent Object Inpainting on 360-degree Neural Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12677-12686} }
Neural Implicit Representation for Building Digital Twins of Unknown Articulated Objects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Weng_2024_CVPR, author = {Weng, Yijia and Wen, Bowen and Tremblay, Jonathan and Blukis, Valts and Fox, Dieter and Guibas, Leonidas and Birchfield, Stan}, title = {Neural Implicit Representation for Building Digital Twins of Unknown Articulated Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3141-3150} }
Progressive Semantic-Guided Vision Transformer for Zero-Shot Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Shiming and Hou, Wenjin and Khan, Salman and Khan, Fahad Shahbaz}, title = {Progressive Semantic-Guided Vision Transformer for Zero-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23964-23974} }
IS-Fusion: Instance-Scene Collaborative Fusion for Multimodal 3D Object Detection-
[pdf]
[bibtex]@InProceedings{Yin_2024_CVPR, author = {Yin, Junbo and Shen, Jianbing and Chen, Runnan and Li, Wei and Yang, Ruigang and Frossard, Pascal and Wang, Wenguan}, title = {IS-Fusion: Instance-Scene Collaborative Fusion for Multimodal 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14905-14915} }
Building Bridges across Spatial and Temporal Resolutions: Reference-Based Super-Resolution via Change Priors and Conditional Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dong_2024_CVPR, author = {Dong, Runmin and Yuan, Shuai and Luo, Bin and Chen, Mengxuan and Zhang, Jinxiao and Zhang, Lixian and Li, Weijia and Zheng, Juepeng and Fu, Haohuan}, title = {Building Bridges across Spatial and Temporal Resolutions: Reference-Based Super-Resolution via Change Priors and Conditional Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {27684-27694} }
Vanishing-Point-Guided Video Semantic Segmentation of Driving Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2024_CVPR, author = {Guo, Diandian and Fan, Deng-Ping and Lu, Tongyu and Sakaridis, Christos and Van Gool, Luc}, title = {Vanishing-Point-Guided Video Semantic Segmentation of Driving Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3544-3553} }
Enhancing Intrinsic Features for Debiasing via Investigating Class-Discerning Common Attributes in Bias-Contrastive Pair-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2024_CVPR, author = {Park, Jeonghoon and Chung, Chaeyeon and Choo, Jaegul}, title = {Enhancing Intrinsic Features for Debiasing via Investigating Class-Discerning Common Attributes in Bias-Contrastive Pair}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12332-12341} }
LAMP: Learn A Motion Pattern for Few-Shot Video Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Ruiqi and Chen, Liangyu and Yang, Tong and Guo, Chunle and Li, Chongyi and Zhang, Xiangyu}, title = {LAMP: Learn A Motion Pattern for Few-Shot Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7089-7098} }
Compositional Chain-of-Thought Prompting for Large Multimodal Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mitra_2024_CVPR, author = {Mitra, Chancharik and Huang, Brandon and Darrell, Trevor and Herzig, Roei}, title = {Compositional Chain-of-Thought Prompting for Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14420-14431} }
Diffusion Time-step Curriculum for One Image to 3D Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yi_2024_CVPR, author = {Yi, Xuanyu and Wu, Zike and Xu, Qingshan and Zhou, Pan and Lim, Joo-Hwee and Zhang, Hanwang}, title = {Diffusion Time-step Curriculum for One Image to 3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9948-9958} }
Language-driven Object Fusion into Neural Radiance Fields with Pose-Conditioned Dataset Updates-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shum_2024_CVPR, author = {Shum, Ka Chun and Kim, Jaeyeon and Hua, Binh-Son and Nguyen, Duc Thanh and Yeung, Sai-Kit}, title = {Language-driven Object Fusion into Neural Radiance Fields with Pose-Conditioned Dataset Updates}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5176-5187} }
Adaptive Hyper-graph Aggregation for Modality-Agnostic Federated Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Qi_2024_CVPR, author = {Qi, Fan and Li, Shuai}, title = {Adaptive Hyper-graph Aggregation for Modality-Agnostic Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12312-12321} }
SPIN: Simultaneous Perception Interaction and Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Uppal_2024_CVPR, author = {Uppal, Shagun and Agarwal, Ananye and Xiong, Haoyu and Shaw, Kenneth and Pathak, Deepak}, title = {SPIN: Simultaneous Perception Interaction and Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18133-18142} }
DREAM: Diffusion Rectification and Estimation-Adaptive Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Jinxin and Ding, Tianyu and Chen, Tianyi and Jiang, Jiachen and Zharkov, Ilya and Zhu, Zhihui and Liang, Luming}, title = {DREAM: Diffusion Rectification and Estimation-Adaptive Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8342-8351} }
Exploring the Potential of Large Foundation Models for Open-Vocabulary HOI Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lei_2024_CVPR, author = {Lei, Ting and Yin, Shaofeng and Liu, Yang}, title = {Exploring the Potential of Large Foundation Models for Open-Vocabulary HOI Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16657-16667} }
Back