Papers
- Back
DPMesh: Exploiting Diffusion Prior for Occluded Human Mesh Recovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Yixuan and Li, Ao and Tang, Yansong and Zhao, Wenliang and Zhou, Jie and Lu, Jiwen}, title = {DPMesh: Exploiting Diffusion Prior for Occluded Human Mesh Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1101-1110} }
HEAL-SWIN: A Vision Transformer On The Sphere-
[pdf]
[supp]
[bibtex]@InProceedings{Carlsson_2024_CVPR, author = {Carlsson, Oscar and Gerken, Jan E. and Linander, Hampus and Spie{\ss}, Heiner and Ohlsson, Fredrik and Petersson, Christoffer and Persson, Daniel}, title = {HEAL-SWIN: A Vision Transformer On The Sphere}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6067-6077} }
3D Paintbrush: Local Stylization of 3D Shapes with Cascaded Score Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Decatur_2024_CVPR, author = {Decatur, Dale and Lang, Itai and Aberman, Kfir and Hanocka, Rana}, title = {3D Paintbrush: Local Stylization of 3D Shapes with Cascaded Score Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4473-4483} }
Guided Slot Attention for Unsupervised Video Object Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lee_2024_CVPR, author = {Lee, Minhyeok and Cho, Suhwan and Lee, Dogyoon and Park, Chaewon and Lee, Jungho and Lee, Sangyoun}, title = {Guided Slot Attention for Unsupervised Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3807-3816} }
Programmable Motion Generation for Open-Set Motion Control Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Hanchao and Zhan, Xiaohang and Huang, Shaoli and Mu, Tai-Jiang and Shan, Ying}, title = {Programmable Motion Generation for Open-Set Motion Control Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1399-1408} }
SCE-MAE: Selective Correspondence Enhancement with Masked Autoencoder for Self-Supervised Landmark Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Yin_2024_CVPR, author = {Yin, Kejia and Rao, Varshanth and Jiang, Ruowei and Liu, Xudong and Aarabi, Parham and Lindell, David B.}, title = {SCE-MAE: Selective Correspondence Enhancement with Masked Autoencoder for Self-Supervised Landmark Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1313-1322} }
LAKE-RED: Camouflaged Images Generation by Latent Background Knowledge Retrieval-Augmented Diffusion-
[pdf]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Pancheng and Xu, Peng and Qin, Pengda and Fan, Deng-Ping and Zhang, Zhicheng and Jia, Guoli and Zhou, Bowen and Yang, Jufeng}, title = {LAKE-RED: Camouflaged Images Generation by Latent Background Knowledge Retrieval-Augmented Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4092-4101} }
TIGER: Time-Varying Denoising Model for 3D Point Cloud Generation with Diffusion Process-
[pdf]
[supp]
[bibtex]@InProceedings{Ren_2024_CVPR, author = {Ren, Zhiyuan and Kim, Minchul and Liu, Feng and Liu, Xiaoming}, title = {TIGER: Time-Varying Denoising Model for 3D Point Cloud Generation with Diffusion Process}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9462-9471} }
ASH: Animatable Gaussian Splats for Efficient and Photoreal Human Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pang_2024_CVPR, author = {Pang, Haokai and Zhu, Heming and Kortylewski, Adam and Theobalt, Christian and Habermann, Marc}, title = {ASH: Animatable Gaussian Splats for Efficient and Photoreal Human Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1165-1175} }
ArtAdapter: Text-to-Image Style Transfer using Multi-Level Style Encoder and Explicit Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Dar-Yen and Tennent, Hamish and Hsu, Ching-Wen}, title = {ArtAdapter: Text-to-Image Style Transfer using Multi-Level Style Encoder and Explicit Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8619-8628} }
Activity-Biometrics: Person Identification from Daily Activities-
[pdf]
[supp]
[bibtex]@InProceedings{Azad_2024_CVPR, author = {Azad, Shehreen and Rawat, Yogesh Singh}, title = {Activity-Biometrics: Person Identification from Daily Activities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {287-296} }
Z*: Zero-shot Style Transfer via Attention Reweighting-
[pdf]
[supp]
[bibtex]@InProceedings{Deng_2024_CVPR, author = {Deng, Yingying and He, Xiangyu and Tang, Fan and Dong, Weiming}, title = {Z*: Zero-shot Style Transfer via Attention Reweighting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6934-6944} }
Learning Continuous 3D Words for Text-to-Image Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Cheng_2024_CVPR, author = {Cheng, Ta-Ying and Gadelha, Matheus and Groueix, Thibault and Fisher, Matthew and Mech, Radomir and Markham, Andrew and Trigoni, Niki}, title = {Learning Continuous 3D Words for Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6753-6762} }
MarkovGen: Structured Prediction for Efficient Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jayasumana_2024_CVPR, author = {Jayasumana, Sadeep and Glasner, Daniel and Ramalingam, Srikumar and Veit, Andreas and Chakrabarti, Ayan and Kumar, Sanjiv}, title = {MarkovGen: Structured Prediction for Efficient Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9316-9325} }
HashPoint: Accelerated Point Searching and Sampling for Neural Rendering-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2024_CVPR, author = {Ma, Jiahao and Liu, Miaomiao and Ahmedt-Aristizabal, David and Nguyen, Chuong}, title = {HashPoint: Accelerated Point Searching and Sampling for Neural Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4462-4472} }
MFP: Making Full Use of Probability Maps for Interactive Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2024_CVPR, author = {Lee, Chaewon and Lee, Seon-Ho and Kim, Chang-Su}, title = {MFP: Making Full Use of Probability Maps for Interactive Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4051-4059} }
StyLitGAN: Image-Based Relighting via Latent Control-
[pdf]
[supp]
[bibtex]@InProceedings{Bhattad_2024_CVPR, author = {Bhattad, Anand and Soole, James and Forsyth, D.A.}, title = {StyLitGAN: Image-Based Relighting via Latent Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4231-4240} }
MoMask: Generative Masked Modeling of 3D Human Motions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2024_CVPR, author = {Guo, Chuan and Mu, Yuxuan and Javed, Muhammad Gohar and Wang, Sen and Cheng, Li}, title = {MoMask: Generative Masked Modeling of 3D Human Motions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1900-1910} }
Fitting Flats to Flats-
[pdf]
[supp]
[bibtex]@InProceedings{Dogadov_2024_CVPR, author = {Dogadov, Gabriel and Finnendahl, Ugo and Alexa, Marc}, title = {Fitting Flats to Flats}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5439-5447} }
Coupled Laplacian Eigenmaps for Locally-Aware 3D Rigid Point Cloud Matching-
[pdf]
[supp]
[bibtex]@InProceedings{Bastico_2024_CVPR, author = {Bastico, Matteo and Decenci\`ere, Etienne and Cort\'e, Laurent and Tillier, Yannick and Ryckelynck, David}, title = {Coupled Laplacian Eigenmaps for Locally-Aware 3D Rigid Point Cloud Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3447-3458} }
Scaling Up Video Summarization Pretraining with Large Language Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Argaw_2024_CVPR, author = {Argaw, Dawit Mureja and Yoon, Seunghyun and Heilbron, Fabian Caba and Deilamsalehy, Hanieh and Bui, Trung and Wang, Zhaowen and Dernoncourt, Franck and Chung, Joon Son}, title = {Scaling Up Video Summarization Pretraining with Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8332-8341} }
Continuous Optical Zooming: A Benchmark for Arbitrary-Scale Image Super-Resolution in Real World-
[pdf]
[bibtex]@InProceedings{Fu_2024_CVPR, author = {Fu, Huiyuan and Peng, Fei and Li, Xianwei and Li, Yejun and Wang, Xin and Ma, Huadong}, title = {Continuous Optical Zooming: A Benchmark for Arbitrary-Scale Image Super-Resolution in Real World}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3035-3044} }
Sharingan: A Transformer Architecture for Multi-Person Gaze Following-
[pdf]
[supp]
[bibtex]@InProceedings{Tafasca_2024_CVPR, author = {Tafasca, Samy and Gupta, Anshul and Odobez, Jean-Marc}, title = {Sharingan: A Transformer Architecture for Multi-Person Gaze Following}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2008-2017} }
Open-Vocabulary Segmentation with Semantic-Assisted Calibration-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Yong and Bai, Sule and Li, Guanbin and Wang, Yitong and Tang, Yansong}, title = {Open-Vocabulary Segmentation with Semantic-Assisted Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3491-3500} }
Towards a Perceptual Evaluation Framework for Lighting Estimation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Giroux_2024_CVPR, author = {Giroux, Justine and Dastjerdi, Mohammad Reza Karimi and Hold-Geoffroy, Yannick and Vazquez-Corral, Javier and Lalonde, Jean-Fran\c{c}ois}, title = {Towards a Perceptual Evaluation Framework for Lighting Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4410-4419} }
On Exact Inversion of DPM-Solvers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hong_2024_CVPR, author = {Hong, Seongmin and Lee, Kyeonghyun and Jeon, Suh Yoon and Bae, Hyewon and Chun, Se Young}, title = {On Exact Inversion of DPM-Solvers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7069-7078} }
CAMEL: CAusal Motion Enhancement Tailored for Lifting Text-driven Video Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Guiwei and Zhang, Tianyu and Niu, Guanglin and Tan, Zichang and Bai, Yalong and Yang, Qing}, title = {CAMEL: CAusal Motion Enhancement Tailored for Lifting Text-driven Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9079-9088} }
FocSAM: Delving Deeply into Focused Objects in Segmenting Anything-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, You and Lan, Zongyu and Cao, Liujuan and Lin, Xianming and Zhang, Shengchuan and Jiang, Guannan and Ji, Rongrong}, title = {FocSAM: Delving Deeply into Focused Objects in Segmenting Anything}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3120-3130} }
PRDP: Proximal Reward Difference Prediction for Large-Scale Reward Finetuning of Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deng_2024_CVPR, author = {Deng, Fei and Wang, Qifei and Wei, Wei and Hou, Tingbo and Grundmann, Matthias}, title = {PRDP: Proximal Reward Difference Prediction for Large-Scale Reward Finetuning of Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7423-7433} }
Task-Customized Mixture of Adapters for General Image Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Pengfei and Sun, Yang and Cao, Bing and Hu, Qinghua}, title = {Task-Customized Mixture of Adapters for General Image Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7099-7108} }
Artist-Friendly Relightable and Animatable Neural Heads-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Yingyan and Chandran, Prashanth and Weiss, Sebastian and Gross, Markus and Zoss, Gaspard and Bradley, Derek}, title = {Artist-Friendly Relightable and Animatable Neural Heads}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2457-2467} }
From Feature to Gaze: A Generalizable Replacement of Linear Layer for Gaze Estimation-
[pdf]
[bibtex]@InProceedings{Bao_2024_CVPR, author = {Bao, Yiwei and Lu, Feng}, title = {From Feature to Gaze: A Generalizable Replacement of Linear Layer for Gaze Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1409-1418} }
Boosting Image Restoration via Priors from Pre-trained Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Xiaogang and Kong, Shu and Hu, Tao and Liu, Zhe and Bao, Hujun}, title = {Boosting Image Restoration via Priors from Pre-trained Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2900-2909} }
VRetouchEr: Learning Cross-frame Feature Interdependence with Imperfection Flow for Face Retouching in Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Xue_2024_CVPR, author = {Xue, Wen and Jiang, Le and Xie, Lianxin and Wu, Si and Xu, Yong and Wong, Hau San}, title = {VRetouchEr: Learning Cross-frame Feature Interdependence with Imperfection Flow for Face Retouching in Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9141-9150} }
Arbitrary-Scale Image Generation and Upsampling using Latent Diffusion Model and Implicit Neural Decoder-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Jinseok and Kim, Tae-Kyun}, title = {Arbitrary-Scale Image Generation and Upsampling using Latent Diffusion Model and Implicit Neural Decoder}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9202-9211} }
Cache Me if You Can: Accelerating Diffusion Models through Block Caching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wimbauer_2024_CVPR, author = {Wimbauer, Felix and Wu, Bichen and Schoenfeld, Edgar and Dai, Xiaoliang and Hou, Ji and He, Zijian and Sanakoyeu, Artsiom and Zhang, Peizhao and Tsai, Sam and Kohler, Jonas and Rupprecht, Christian and Cremers, Daniel and Vajda, Peter and Wang, Jialiang}, title = {Cache Me if You Can: Accelerating Diffusion Models through Block Caching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6211-6220} }
Identifying Important Group of Pixels using Interactions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sumiyasu_2024_CVPR, author = {Sumiyasu, Kosuke and Kawamoto, Kazuhiko and Kera, Hiroshi}, title = {Identifying Important Group of Pixels using Interactions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6017-6026} }
DIOD: Self-Distillation Meets Object Discovery-
[pdf]
[supp]
[bibtex]@InProceedings{Kara_2024_CVPR, author = {Kara, Sandra and Ammar, Hejer and Denize, Julien and Chabot, Florian and Pham, Quoc-Cuong}, title = {DIOD: Self-Distillation Meets Object Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3975-3985} }
GoMAvatar: Efficient Animatable Human Modeling from Monocular Video Using Gaussians-on-Mesh-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wen_2024_CVPR, author = {Wen, Jing and Zhao, Xiaoming and Ren, Zhongzheng and Schwing, Alexander G. and Wang, Shenlong}, title = {GoMAvatar: Efficient Animatable Human Modeling from Monocular Video Using Gaussians-on-Mesh}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2059-2069} }
Neural Redshift: Random Networks are not Random Functions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Teney_2024_CVPR, author = {Teney, Damien and Nicolicioiu, Armand Mihai and Hartmann, Valentin and Abbasnejad, Ehsan}, title = {Neural Redshift: Random Networks are not Random Functions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4786-4796} }
HumanGaussian: Text-Driven 3D Human Generation with Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Xian and Zhan, Xiaohang and Tang, Jiaxiang and Shan, Ying and Zeng, Gang and Lin, Dahua and Liu, Xihui and Liu, Ziwei}, title = {HumanGaussian: Text-Driven 3D Human Generation with Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6646-6657} }
CosmicMan: A Text-to-Image Foundation Model for Humans-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Shikai and Fu, Jianglin and Liu, Kaiyuan and Wang, Wentao and Lin, Kwan-Yee and Wu, Wayne}, title = {CosmicMan: A Text-to-Image Foundation Model for Humans}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6955-6965} }
JDEC: JPEG Decoding via Enhanced Continuous Cosine Coefficients-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2024_CVPR, author = {Han, Woo Kyoung and Im, Sunghoon and Kim, Jaedeok and Jin, Kyong Hwan}, title = {JDEC: JPEG Decoding via Enhanced Continuous Cosine Coefficients}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2784-2793} }
HOI-M^3: Capture Multiple Humans and Objects Interaction within Contextual Environment-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Juze and Zhang, Jingyan and Song, Zining and Shi, Zhanhe and Zhao, Chengfeng and Shi, Ye and Yu, Jingyi and Xu, Lan and Wang, Jingya}, title = {HOI-M{\textasciicircum}3: Capture Multiple Humans and Objects Interaction within Contextual Environment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {516-526} }
Interactive3D: Create What You Want by Interactive 3D Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dong_2024_CVPR, author = {Dong, Shaocong and Ding, Lihe and Huang, Zhanpeng and Wang, Zibin and Xue, Tianfan and Xu, Dan}, title = {Interactive3D: Create What You Want by Interactive 3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4999-5008} }
OmniLocalRF: Omnidirectional Local Radiance Fields from Dynamic Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2024_CVPR, author = {Choi, Dongyoung and Jang, Hyeonjoong and Kim, Min H.}, title = {OmniLocalRF: Omnidirectional Local Radiance Fields from Dynamic Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6871-6880} }
Semantic Human Mesh Reconstruction with Textures-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhan_2024_CVPR, author = {Zhan, Xiaoyu and Yang, Jianxin and Li, Yuanqi and Guo, Jie and Guo, Yanwen and Wang, Wenping}, title = {Semantic Human Mesh Reconstruction with Textures}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {142-152} }
PIA: Your Personalized Image Animator via Plug-and-Play Modules in Text-to-Image Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yiming and Xing, Zhening and Zeng, Yanhong and Fang, Youqing and Chen, Kai}, title = {PIA: Your Personalized Image Animator via Plug-and-Play Modules in Text-to-Image Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7747-7756} }
NeRF Analogies: Example-Based Visual Attribute Transfer for NeRFs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fischer_2024_CVPR, author = {Fischer, Michael and Li, Zhengqin and Nguyen-Phuoc, Thu and Bozic, Aljaz and Dong, Zhao and Marshall, Carl and Ritschel, Tobias}, title = {NeRF Analogies: Example-Based Visual Attribute Transfer for NeRFs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4640-4650} }
Texture-Preserving Diffusion Models for High-Fidelity Virtual Try-On-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Xu and Ding, Changxing and Hong, Zhibin and Huang, Junhao and Tao, Jin and Xu, Xiangmin}, title = {Texture-Preserving Diffusion Models for High-Fidelity Virtual Try-On}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7017-7026} }
Towards Robust Event-guided Low-Light Image Enhancement: A Large-Scale Real-World Event-Image Dataset and Novel Approach-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2024_CVPR, author = {Liang, Guoqiang and Chen, Kanghao and Li, Hangyu and Lu, Yunfan and Wang, Lin}, title = {Towards Robust Event-guided Low-Light Image Enhancement: A Large-Scale Real-World Event-Image Dataset and Novel Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23-33} }
From a Bird's Eye View to See: Joint Camera and Subject Registration without the Camera Calibration-
[pdf]
[supp]
[bibtex]@InProceedings{Qian_2024_CVPR, author = {Qian, Zekun and Han, Ruize and Feng, Wei and Wang, Song}, title = {From a Bird's Eye View to See: Joint Camera and Subject Registration without the Camera Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {863-873} }
Enhancing Video Super-Resolution via Implicit Resampling-based Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Kai and Yu, Ziwei and Wang, Xin and Mi, Michael Bi and Yao, Angela}, title = {Enhancing Video Super-Resolution via Implicit Resampling-based Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2546-2555} }
Parameter Efficient Fine-tuning via Cross Block Orchestration for Segment Anything Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2024_CVPR, author = {Peng, Zelin and Xu, Zhengqin and Zeng, Zhilin and Xie, Lingxi and Tian, Qi and Shen, Wei}, title = {Parameter Efficient Fine-tuning via Cross Block Orchestration for Segment Anything Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3743-3752} }
Masked and Shuffled Blind Spot Denoising for Real-World Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chihaoui_2024_CVPR, author = {Chihaoui, Hamadi and Favaro, Paolo}, title = {Masked and Shuffled Blind Spot Denoising for Real-World Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3025-3034} }
DiffusionAvatars: Deferred Diffusion for High-fidelity 3D Head Avatars-
[pdf]
[supp]
[bibtex]@InProceedings{Kirschstein_2024_CVPR, author = {Kirschstein, Tobias and Giebenhain, Simon and Nie{\ss}ner, Matthias}, title = {DiffusionAvatars: Deferred Diffusion for High-fidelity 3D Head Avatars}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5481-5492} }
Data-Free Quantization via Pseudo-label Filtering-
[pdf]
[bibtex]@InProceedings{Fan_2024_CVPR, author = {Fan, Chunxiao and Wang, Ziqi and Guo, Dan and Wang, Meng}, title = {Data-Free Quantization via Pseudo-label Filtering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5589-5598} }
Generative Powers of Ten-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Xiaojuan and Kontkanen, Janne and Curless, Brian and Seitz, Steven M. and Kemelmacher-Shlizerman, Ira and Mildenhall, Ben and Srinivasan, Pratul and Verbin, Dor and Holynski, Aleksander}, title = {Generative Powers of Ten}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7173-7182} }
Text-conditional Attribute Alignment across Latent Spaces for 3D Controllable Face Image Synthesis-
[pdf]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Feifan and Li, Rui and Wu, Si and Xu, Yong and Wong, Hau San}, title = {Text-conditional Attribute Alignment across Latent Spaces for 3D Controllable Face Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9172-9181} }
Correcting Diffusion Generation through Resampling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Yujian and Zhang, Yang and Jaakkola, Tommi and Chang, Shiyu}, title = {Correcting Diffusion Generation through Resampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8713-8723} }
AirPlanes: Accurate Plane Estimation via 3D-Consistent Embeddings-
[pdf]
[arXiv]
[bibtex]@InProceedings{Watson_2024_CVPR, author = {Watson, Jamie and Aleotti, Filippo and Sayed, Mohamed and Qureshi, Zawar and Mac Aodha, Oisin and Brostow, Gabriel and Firman, Michael and Vicente, Sara}, title = {AirPlanes: Accurate Plane Estimation via 3D-Consistent Embeddings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5270-5280} }
Blur2Blur: Blur Conversion for Unsupervised Image Deblurring on Unknown Domains-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pham_2024_CVPR, author = {Pham, Bang-Dang and Tran, Phong and Tran, Anh and Pham, Cuong and Nguyen, Rang and Hoai, Minh}, title = {Blur2Blur: Blur Conversion for Unsupervised Image Deblurring on Unknown Domains}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2804-2813} }
Exploring Vision Transformers for 3D Human Motion-Language Models with Motion Patches-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Qing and Tanaka, Mikihiro and Fujiwara, Kent}, title = {Exploring Vision Transformers for 3D Human Motion-Language Models with Motion Patches}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {937-946} }
Clustering for Protein Representation Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Quan_2024_CVPR, author = {Quan, Ruijie and Wang, Wenguan and Ma, Fan and Fan, Hehe and Yang, Yi}, title = {Clustering for Protein Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {319-329} }
CorrMatch: Label Propagation via Correlation Matching for Semi-Supervised Semantic Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Sun_2024_CVPR, author = {Sun, Boyuan and Yang, Yuqi and Zhang, Le and Cheng, Ming-Ming and Hou, Qibin}, title = {CorrMatch: Label Propagation via Correlation Matching for Semi-Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3097-3107} }
Estimating Extreme 3D Image Rotations using Cascaded Attention-
[pdf]
[supp]
[bibtex]@InProceedings{Dekel_2024_CVPR, author = {Dekel, Shay and Keller, Yosi and Cadik, Martin}, title = {Estimating Extreme 3D Image Rotations using Cascaded Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2588-2598} }
Adapt or Perish: Adaptive Sparse Transformer with Attentive Feature Refinement for Image Restoration-
[pdf]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Shihao and Chen, Duosheng and Pan, Jinshan and Shi, Jinglei and Yang, Jufeng}, title = {Adapt or Perish: Adaptive Sparse Transformer with Attentive Feature Refinement for Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2952-2963} }
VINECS: Video-based Neural Character Skinning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liao_2024_CVPR, author = {Liao, Zhouyingcheng and Golyanik, Vladislav and Habermann, Marc and Theobalt, Christian}, title = {VINECS: Video-based Neural Character Skinning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1377-1387} }
Your Student is Better Than Expected: Adaptive Teacher-Student Collaboration for Text-Conditional Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Starodubcev_2024_CVPR, author = {Starodubcev, Nikita and Baranchuk, Dmitry and Fedorov, Artem and Babenko, Artem}, title = {Your Student is Better Than Expected: Adaptive Teacher-Student Collaboration for Text-Conditional Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9275-9285} }
SHViT: Single-Head Vision Transformer with Memory Efficient Macro Design-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yun_2024_CVPR, author = {Yun, Seokju and Ro, Youngmin}, title = {SHViT: Single-Head Vision Transformer with Memory Efficient Macro Design}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5756-5767} }
CommonCanvas: Open Diffusion Models Trained on Creative-Commons Images-
[pdf]
[supp]
[bibtex]@InProceedings{Gokaslan_2024_CVPR, author = {Gokaslan, Aaron and Cooper, A. Feder and Collins, Jasmine and Seguin, Landan and Jacobson, Austin and Patel, Mihir and Frankle, Jonathan and Stephenson, Cory and Kuleshov, Volodymyr}, title = {CommonCanvas: Open Diffusion Models Trained on Creative-Commons Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8250-8260} }
Prompt-Driven Referring Image Segmentation with Instance Contrasting-
[pdf]
[bibtex]@InProceedings{Shang_2024_CVPR, author = {Shang, Chao and Song, Zichen and Qiu, Heqian and Wang, Lanxiao and Meng, Fanman and Li, Hongliang}, title = {Prompt-Driven Referring Image Segmentation with Instance Contrasting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4124-4134} }
Image Sculpting: Precise Object Editing with 3D Geometry Control-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yenphraphai_2024_CVPR, author = {Yenphraphai, Jiraphon and Pan, Xichen and Liu, Sainan and Panozzo, Daniele and Xie, Saining}, title = {Image Sculpting: Precise Object Editing with 3D Geometry Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4241-4251} }
PFStorer: Personalized Face Restoration and Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Varanka_2024_CVPR, author = {Varanka, Tuomas and Toivonen, Tapani and Tripathy, Soumya and Zhao, Guoying and Acar, Erman}, title = {PFStorer: Personalized Face Restoration and Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2372-2381} }
TextureDreamer: Image-Guided Texture Synthesis Through Geometry-Aware Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yeh_2024_CVPR, author = {Yeh, Yu-Ying and Huang, Jia-Bin and Kim, Changil and Xiao, Lei and Nguyen-Phuoc, Thu and Khan, Numair and Zhang, Cheng and Chandraker, Manmohan and Marshall, Carl S and Dong, Zhao and Li, Zhengqin}, title = {TextureDreamer: Image-Guided Texture Synthesis Through Geometry-Aware Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4304-4314} }
Boosting Image Quality Assessment through Efficient Transformer Adaptation with Local Feature Enhancement-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Kangmin and Liao, Liang and Xiao, Jing and Chen, Chaofeng and Wu, Haoning and Yan, Qiong and Lin, Weisi}, title = {Boosting Image Quality Assessment through Efficient Transformer Adaptation with Local Feature Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2662-2672} }
Attention Calibration for Disentangled Text-to-Image Personalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yanbing and Yang, Mengping and Zhou, Qin and Wang, Zhe}, title = {Attention Calibration for Disentangled Text-to-Image Personalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4764-4774} }
One-Shot Structure-Aware Stylized Image Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cho_2024_CVPR, author = {Cho, Hansam and Lee, Jonghyun and Chang, Seunggyu and Jeong, Yonghyun}, title = {One-Shot Structure-Aware Stylized Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8302-8311} }
MR-VNet: Media Restoration using Volterra Networks-
[pdf]
[bibtex]@InProceedings{Roheda_2024_CVPR, author = {Roheda, Siddharth and Unde, Amit and Rashid, Loay}, title = {MR-VNet: Media Restoration using Volterra Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6098-6107} }
Single Mesh Diffusion Models with Field Latents for Texture Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mitchel_2024_CVPR, author = {Mitchel, Thomas W. and Esteves, Carlos and Makadia, Ameesh}, title = {Single Mesh Diffusion Models with Field Latents for Texture Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7953-7963} }
SAI3D: Segment Any Instance in 3D Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yin_2024_CVPR, author = {Yin, Yingda and Liu, Yuzheng and Xiao, Yang and Cohen-Or, Daniel and Huang, Jingwei and Chen, Baoquan}, title = {SAI3D: Segment Any Instance in 3D Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3292-3302} }
TexOct: Generating Textures of 3D Models with Octree-based Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Jialun and Wu, Chenming and Liu, Xinqi and Liu, Xing and Wu, Jinbo and Peng, Haotian and Zhao, Chen and Feng, Haocheng and Liu, Jingtuo and Ding, Errui}, title = {TexOct: Generating Textures of 3D Models with Octree-based Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4284-4293} }
Anatomically Constrained Implicit Face Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chandran_2024_CVPR, author = {Chandran, Prashanth and Zoss, Gaspard}, title = {Anatomically Constrained Implicit Face Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2220-2229} }
Capturing Closely Interacted Two-Person Motions with Reaction Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Fang_2024_CVPR, author = {Fang, Qi and Fan, Yinghui and Li, Yanjun and Dong, Junting and Wu, Dingwei and Zhang, Weidong and Chen, Kang}, title = {Capturing Closely Interacted Two-Person Motions with Reaction Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {655-665} }
RobustSAM: Segment Anything Robustly on Degraded Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Wei-Ting and Vong, Yu-Jiet and Kuo, Sy-Yen and Ma, Sizhou and Wang, Jian}, title = {RobustSAM: Segment Anything Robustly on Degraded Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4081-4091} }
In-N-Out: Faithful 3D GAN Inversion with Volumetric Decomposition for Face Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Yiran and Shu, Zhixin and Smith, Cameron and Oh, Seoung Wug and Huang, Jia-Bin}, title = {In-N-Out: Faithful 3D GAN Inversion with Volumetric Decomposition for Face Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7225-7235} }
Combining Frame and GOP Embeddings for Neural Video Representation-
[pdf]
[supp]
[bibtex]@InProceedings{Saethre_2024_CVPR, author = {Saethre, Jens Eirik and Azevedo, Roberto and Schroers, Christopher}, title = {Combining Frame and GOP Embeddings for Neural Video Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9253-9263} }
Fantastic Animals and Where to Find Them: Segment Any Marine Animal with Dual SAM-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Pingping and Yan, Tianyu and Liu, Yang and Lu, Huchuan}, title = {Fantastic Animals and Where to Find Them: Segment Any Marine Animal with Dual SAM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2578-2587} }
Seeing and Hearing: Open-domain Visual-Audio Generation with Diffusion Latent Aligners-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xing_2024_CVPR, author = {Xing, Yazhou and He, Yingqing and Tian, Zeyue and Wang, Xintao and Chen, Qifeng}, title = {Seeing and Hearing: Open-domain Visual-Audio Generation with Diffusion Latent Aligners}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7151-7161} }
Objects as Volumes: A Stochastic Geometry View of Opaque Solids-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Miller_2024_CVPR, author = {Miller, Bailey and Chen, Hanyu and Lai, Alice and Gkioulekas, Ioannis}, title = {Objects as Volumes: A Stochastic Geometry View of Opaque Solids}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {87-97} }
Improving Subject-Driven Image Synthesis with Subject-Agnostic Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chan_2024_CVPR, author = {Chan, Kelvin C.K. and Zhao, Yang and Jia, Xuhui and Yang, Ming-Hsuan and Wang, Huisheng}, title = {Improving Subject-Driven Image Synthesis with Subject-Agnostic Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6733-6742} }
Diffusion Model Alignment Using Direct Preference Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wallace_2024_CVPR, author = {Wallace, Bram and Dang, Meihua and Rafailov, Rafael and Zhou, Linqi and Lou, Aaron and Purushwalkam, Senthil and Ermon, Stefano and Xiong, Caiming and Joty, Shafiq and Naik, Nikhil}, title = {Diffusion Model Alignment Using Direct Preference Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8228-8238} }
ZeroNVS: Zero-Shot 360-Degree View Synthesis from a Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sargent_2024_CVPR, author = {Sargent, Kyle and Li, Zizhang and Shah, Tanmay and Herrmann, Charles and Yu, Hong-Xing and Zhang, Yunzhi and Chan, Eric Ryan and Lagun, Dmitry and Fei-Fei, Li and Sun, Deqing and Wu, Jiajun}, title = {ZeroNVS: Zero-Shot 360-Degree View Synthesis from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9420-9429} }
Restoration by Generation with Constrained Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ding_2024_CVPR, author = {Ding, Zheng and Zhang, Xuaner and Tu, Zhuowen and Xia, Zhihao}, title = {Restoration by Generation with Constrained Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2567-2577} }
Blur-aware Spatio-temporal Sparse Transformer for Video Deblurring-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Huicong and Xie, Haozhe and Yao, Hongxun}, title = {Blur-aware Spatio-temporal Sparse Transformer for Video Deblurring}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2673-2681} }
DiffusionPoser: Real-time Human Motion Reconstruction From Arbitrary Sparse Sensors Using Autoregressive Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Van_Wouwe_2024_CVPR, author = {Van Wouwe, Tom and Lee, Seunghwan and Falisse, Antoine and Delp, Scott and Liu, C. Karen}, title = {DiffusionPoser: Real-time Human Motion Reconstruction From Arbitrary Sparse Sensors Using Autoregressive Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2513-2523} }
MANUS: Markerless Grasp Capture using Articulated 3D Gaussians-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pokhariya_2024_CVPR, author = {Pokhariya, Chandradeep and Shah, Ishaan Nikhil and Xing, Angela and Li, Zekun and Chen, Kefan and Sharma, Avinash and Sridhar, Srinath}, title = {MANUS: Markerless Grasp Capture using Articulated 3D Gaussians}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2197-2208} }
BerfScene: Bev-conditioned Equivariant Radiance Fields for Infinite 3D Scene Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Qihang and Xu, Yinghao and Shen, Yujun and Dai, Bo and Zhou, Bolei and Yang, Ceyuan}, title = {BerfScene: Bev-conditioned Equivariant Radiance Fields for Infinite 3D Scene Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6839-6849} }
3D Facial Expressions through Analysis-by-Neural-Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Retsinas_2024_CVPR, author = {Retsinas, George and Filntisis, Panagiotis P. and Danecek, Radek and Abrevaya, Victoria F. and Roussos, Anastasios and Bolkart, Timo and Maragos, Petros}, title = {3D Facial Expressions through Analysis-by-Neural-Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2490-2501} }
Unleashing the Potential of SAM for Medical Adaptation via Hierarchical Decoding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2024_CVPR, author = {Cheng, Zhiheng and Wei, Qingyue and Zhu, Hongru and Wang, Yan and Qu, Liangqiong and Shao, Wei and Zhou, Yuyin}, title = {Unleashing the Potential of SAM for Medical Adaptation via Hierarchical Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3511-3522} }
Puff-Net: Efficient Style Transfer with Pure Content and Style Feature Fusion Network-
[pdf]
[supp]
[bibtex]@InProceedings{Zheng_2024_CVPR, author = {Zheng, Sizhe and Gao, Pan and Zhou, Peng and Qin, Jie}, title = {Puff-Net: Efficient Style Transfer with Pure Content and Style Feature Fusion Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8059-8068} }
Towards Progressive Multi-Frequency Representation for Image Warping-
[pdf]
[bibtex]@InProceedings{Xiao_2024_CVPR, author = {Xiao, Jun and Lyu, Zihang and Zhang, Cong and Ju, Yakun and Shui, Changjian and Lam, Kin-Man}, title = {Towards Progressive Multi-Frequency Representation for Image Warping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2995-3004} }
Learning to Control Camera Exposure via Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2024_CVPR, author = {Lee, Kyunghyun and Shin, Ukcheol and Lee, Byeong-Uk}, title = {Learning to Control Camera Exposure via Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2975-2983} }
RNb-NeuS: Reflectance and Normal-based Multi-View 3D Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Brument_2024_CVPR, author = {Brument, Baptiste and Bruneau, Robin and Qu\'eau, Yvain and M\'elou, Jean and Lauze, Fran\c{c}ois Bernard and Durou, Jean-Denis and Calvet, Lilian}, title = {RNb-NeuS: Reflectance and Normal-based Multi-View 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5230-5239} }
Scaling Up Dynamic Human-Scene Interaction Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2024_CVPR, author = {Jiang, Nan and Zhang, Zhiyuan and Li, Hongjie and Ma, Xiaoxuan and Wang, Zan and Chen, Yixin and Liu, Tengyu and Zhu, Yixin and Huang, Siyuan}, title = {Scaling Up Dynamic Human-Scene Interaction Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1737-1747} }
Semantic-aware SAM for Point-Prompted Instance Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2024_CVPR, author = {Wei, Zhaoyang and Chen, Pengfei and Yu, Xuehui and Li, Guorong and Jiao, Jianbin and Han, Zhenjun}, title = {Semantic-aware SAM for Point-Prompted Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3585-3594} }
Make Pixels Dance: High-Dynamic Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2024_CVPR, author = {Zeng, Yan and Wei, Guoqiang and Zheng, Jiani and Zou, Jiaxin and Wei, Yang and Zhang, Yuchen and Li, Hang}, title = {Make Pixels Dance: High-Dynamic Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8850-8860} }
A&B BNN: Add&Bit-Operation-Only Hardware-Friendly Binary Neural Network-
[pdf]
[bibtex]@InProceedings{Ma_2024_CVPR, author = {Ma, Ruichen and Qiao, Guanchao and Liu, Yian and Meng, Liwei and Ning, Ning and Liu, Yang and Hu, Shaogang}, title = {A\&B BNN: Add\&Bit-Operation-Only Hardware-Friendly Binary Neural Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5704-5713} }
Task-aligned Part-aware Panoptic Segmentation through Joint Object-Part Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{de_Geus_2024_CVPR, author = {de Geus, Daan and Dubbelman, Gijs}, title = {Task-aligned Part-aware Panoptic Segmentation through Joint Object-Part Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3174-3183} }
From Activation to Initialization: Scaling Insights for Optimizing Neural Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Saratchandran_2024_CVPR, author = {Saratchandran, Hemanth and Ramasinghe, Sameera and Lucey, Simon}, title = {From Activation to Initialization: Scaling Insights for Optimizing Neural Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {413-422} }
DiffAvatar: Simulation-Ready Garment Optimization with Differentiable Simulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Yifei and Chen, Hsiao-yu and Larionov, Egor and Sarafianos, Nikolaos and Matusik, Wojciech and Stuyck, Tuur}, title = {DiffAvatar: Simulation-Ready Garment Optimization with Differentiable Simulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4368-4378} }
AlignSAM: Aligning Segment Anything Model to Open Context via Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Duojun and Xiong, Xinyu and Ma, Jie and Li, Jichang and Jie, Zequn and Ma, Lin and Li, Guanbin}, title = {AlignSAM: Aligning Segment Anything Model to Open Context via Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3205-3215} }
Learning Spatial Adaptation and Temporal Coherence in Diffusion Models for Video Super-Resolution-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Zhikai and Long, Fuchen and Qiu, Zhaofan and Yao, Ting and Zhou, Wengang and Luo, Jiebo and Mei, Tao}, title = {Learning Spatial Adaptation and Temporal Coherence in Diffusion Models for Video Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9232-9241} }
Denoising Point Clouds in Latent Space via Graph Convolution and Invertible Neural Network-
[pdf]
[supp]
[bibtex]@InProceedings{Mao_2024_CVPR, author = {Mao, Aihua and Yan, Biao and Ma, Zijing and He, Ying}, title = {Denoising Point Clouds in Latent Space via Graph Convolution and Invertible Neural Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5768-5777} }
HIR-Diff: Unsupervised Hyperspectral Image Restoration Via Improved Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Pang_2024_CVPR, author = {Pang, Li and Rui, Xiangyu and Cui, Long and Wang, Hongzhong and Meng, Deyu and Cao, Xiangyong}, title = {HIR-Diff: Unsupervised Hyperspectral Image Restoration Via Improved Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3005-3014} }
FreeDrag: Feature Dragging for Reliable Point-based Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ling_2024_CVPR, author = {Ling, Pengyang and Chen, Lin and Zhang, Pan and Chen, Huaian and Jin, Yi and Zheng, Jinjin}, title = {FreeDrag: Feature Dragging for Reliable Point-based Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6860-6870} }
Confronting Ambiguity in 6D Object Pose Estimation via Score-Based Diffusion on SE(3)-
[pdf]
[arXiv]
[bibtex]@InProceedings{Hsiao_2024_CVPR, author = {Hsiao, Tsu-Ching and Chen, Hao-Wei and Yang, Hsuan-Kung and Lee, Chun-Yi}, title = {Confronting Ambiguity in 6D Object Pose Estimation via Score-Based Diffusion on SE(3)}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {352-362} }
DiffInDScene: Diffusion-based High-Quality 3D Indoor Scene Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ju_2024_CVPR, author = {Ju, Xiaoliang and Huang, Zhaoyang and Li, Yijin and Zhang, Guofeng and Qiao, Yu and Li, Hongsheng}, title = {DiffInDScene: Diffusion-based High-Quality 3D Indoor Scene Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4526-4535} }
MAPSeg: Unified Unsupervised Domain Adaptation for Heterogeneous Medical Image Segmentation Based on 3D Masked Autoencoding and Pseudo-Labeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Xuzhe and Wu, Yuhao and Angelini, Elsa and Li, Ang and Guo, Jia and Rasmussen, Jerod M. and O'Connor, Thomas G. and Wadhwa, Pathik D. and Jackowski, Andrea Parolin and Li, Hai and Posner, Jonathan and Laine, Andrew F. and Wang, Yun}, title = {MAPSeg: Unified Unsupervised Domain Adaptation for Heterogeneous Medical Image Segmentation Based on 3D Masked Autoencoding and Pseudo-Labeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5851-5862} }
DaReNeRF: Direction-aware Representation for Dynamic Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lou_2024_CVPR, author = {Lou, Ange and Planche, Benjamin and Gao, Zhongpai and Li, Yamin and Luan, Tianyu and Ding, Hao and Chen, Terrence and Noble, Jack and Wu, Ziyan}, title = {DaReNeRF: Direction-aware Representation for Dynamic Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5031-5042} }
SfmCAD: Unsupervised CAD Reconstruction by Learning Sketch-based Feature Modeling Operations-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Pu and Guo, Jianwei and Li, Huibin and Benes, Bedrich and Yan, Dong-Ming}, title = {SfmCAD: Unsupervised CAD Reconstruction by Learning Sketch-based Feature Modeling Operations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4671-4680} }
Learning Degradation-unaware Representation with Prior-based Latent Transformations for Blind Face Restoration-
[pdf]
[bibtex]@InProceedings{Xie_2024_CVPR, author = {Xie, Lianxin and Zheng, Csbingbing and Xue, Wen and Jiang, Le and Liu, Cheng and Wu, Si and Wong, Hau San}, title = {Learning Degradation-unaware Representation with Prior-based Latent Transformations for Blind Face Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9120-9129} }
Faces that Speak: Jointly Synthesising Talking Face and Speech from Text-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jang_2024_CVPR, author = {Jang, Youngjoon and Kim, Ji-Hoon and Ahn, Junseok and Kwak, Doyeop and Yang, Hong-Sun and Ju, Yoon-Cheol and Kim, Il-Hwan and Kim, Byeong-Yeol and Chung, Joon Son}, title = {Faces that Speak: Jointly Synthesising Talking Face and Speech from Text}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8818-8828} }
DiffusionRegPose: Enhancing Multi-Person Pose Estimation using a Diffusion-Based End-to-End Regression Approach-
[pdf]
[supp]
[bibtex]@InProceedings{Tan_2024_CVPR, author = {Tan, Dayi and Chen, Hansheng and Tian, Wei and Xiong, Lu}, title = {DiffusionRegPose: Enhancing Multi-Person Pose Estimation using a Diffusion-Based End-to-End Regression Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2230-2239} }
Memory-Scalable and Simplified Functional Map Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Magnet_2024_CVPR, author = {Magnet, Robin and Ovsjanikov, Maks}, title = {Memory-Scalable and Simplified Functional Map Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4041-4050} }
Gaussian Head Avatar: Ultra High-fidelity Head Avatar via Dynamic Gaussians-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Yuelang and Chen, Benwang and Li, Zhe and Zhang, Hongwen and Wang, Lizhen and Zheng, Zerong and Liu, Yebin}, title = {Gaussian Head Avatar: Ultra High-fidelity Head Avatar via Dynamic Gaussians}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1931-1941} }
Stratified Avatar Generation from Sparse Observations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2024_CVPR, author = {Feng, Han and Ma, Wenchao and Gao, Quankai and Zheng, Xianwei and Xue, Nan and Xu, Huijuan}, title = {Stratified Avatar Generation from Sparse Observations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {153-163} }
Rewrite the Stars-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2024_CVPR, author = {Ma, Xu and Dai, Xiyang and Bai, Yue and Wang, Yizhou and Fu, Yun}, title = {Rewrite the Stars}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5694-5703} }
PairDETR : Joint Detection and Association of Human Bodies and Faces-
[pdf]
[supp]
[bibtex]@InProceedings{Ali_2024_CVPR, author = {Ali, Ammar and Gaikov, Georgii and Rybalchenko, Denis and Chigorin, Alexander and Laptev, Ivan and Zagoruyko, Sergey}, title = {PairDETR : Joint Detection and Association of Human Bodies and Faces}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {423-432} }
SportsSloMo: A New Benchmark and Baselines for Human-centric Video Frame Interpolation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Jiaben and Jiang, Huaizu}, title = {SportsSloMo: A New Benchmark and Baselines for Human-centric Video Frame Interpolation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6475-6486} }
Text2HOI: Text-guided 3D Motion Generation for Hand-Object Interaction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cha_2024_CVPR, author = {Cha, Junuk and Kim, Jihyeon and Yoon, Jae Shin and Baek, Seungryul}, title = {Text2HOI: Text-guided 3D Motion Generation for Hand-Object Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1577-1585} }
MACE: Mass Concept Erasure in Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2024_CVPR, author = {Lu, Shilin and Wang, Zilan and Li, Leyang and Liu, Yanzhu and Kong, Adams Wai-Kin}, title = {MACE: Mass Concept Erasure in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6430-6440} }
PeLK: Parameter-efficient Large Kernel ConvNets with Peripheral Convolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Honghao and Chu, Xiangxiang and Ren, Yongjian and Zhao, Xin and Huang, Kaiqi}, title = {PeLK: Parameter-efficient Large Kernel ConvNets with Peripheral Convolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5557-5567} }
AiOS: All-in-One-Stage Expressive Human Pose and Shape Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2024_CVPR, author = {Sun, Qingping and Wang, Yanjun and Zeng, Ailing and Yin, Wanqi and Wei, Chen and Wang, Wenjia and Mei, Haiyi and Leung, Chi-Sing and Liu, Ziwei and Yang, Lei and Cai, Zhongang}, title = {AiOS: All-in-One-Stage Expressive Human Pose and Shape Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1834-1843} }
Design2Cloth: 3D Cloth Generation from 2D Masks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2024_CVPR, author = {Zheng, Jiali and Potamias, Rolandos Alexandros and Zafeiriou, Stefanos}, title = {Design2Cloth: 3D Cloth Generation from 2D Masks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1748-1758} }
Amodal Completion via Progressive Mixed Context Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Katherine and Zhang, Lingzhi and Shi, Jianbo}, title = {Amodal Completion via Progressive Mixed Context Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9099-9109} }
Diffusion 3D Features (Diff3F): Decorating Untextured Shapes with Distilled Semantic Features-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dutt_2024_CVPR, author = {Dutt, Niladri Shekhar and Muralikrishnan, Sanjeev and Mitra, Niloy J.}, title = {Diffusion 3D Features (Diff3F): Decorating Untextured Shapes with Distilled Semantic Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4494-4504} }
Cinematic Behavior Transfer via NeRF-based Differentiable Filming-
[pdf]
[arXiv]
[bibtex]@InProceedings{Jiang_2024_CVPR, author = {Jiang, Xuekun and Rao, Anyi and Wang, Jingbo and Lin, Dahua and Dai, Bo}, title = {Cinematic Behavior Transfer via NeRF-based Differentiable Filming}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6723-6732} }
Text-Driven Image Editing via Learnable Regions-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lin_2024_CVPR, author = {Lin, Yuanze and Chen, Yi-Wen and Tsai, Yi-Hsuan and Jiang, Lu and Yang, Ming-Hsuan}, title = {Text-Driven Image Editing via Learnable Regions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7059-7068} }
Relation Rectification in Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Yinwei and Yang, Xingyi and Wang, Xinchao}, title = {Relation Rectification in Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7685-7694} }
Mocap Everyone Everywhere: Lightweight Motion Capture With Smartwatches and a Head-Mounted Camera-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2024_CVPR, author = {Lee, Jiye and Joo, Hanbyul}, title = {Mocap Everyone Everywhere: Lightweight Motion Capture With Smartwatches and a Head-Mounted Camera}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1091-1100} }
Fast ODE-based Sampling for Diffusion Models in Around 5 Steps-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Zhenyu and Chen, Defang and Wang, Can and Chen, Chun}, title = {Fast ODE-based Sampling for Diffusion Models in Around 5 Steps}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7777-7786} }
CLiC: Concept Learning in Context-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Safaee_2024_CVPR, author = {Safaee, Mehdi and Mikaeili, Aryan and Patashnik, Or and Cohen-Or, Daniel and Mahdavi-Amiri, Ali}, title = {CLiC: Concept Learning in Context}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6924-6933} }
CAD-SIGNet: CAD Language Inference from Point Clouds using Layer-wise Sketch Instance Guided Attention-
[pdf]
[supp]
[bibtex]@InProceedings{Khan_2024_CVPR, author = {Khan, Mohammad Sadil and Dupont, Elona and Ali, Sk Aziz and Cherenkova, Kseniya and Kacem, Anis and Aouada, Djamila}, title = {CAD-SIGNet: CAD Language Inference from Point Clouds using Layer-wise Sketch Instance Guided Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4713-4722} }
CLIB-FIQA: Face Image Quality Assessment with Confidence Calibration-
[pdf]
[bibtex]@InProceedings{Ou_2024_CVPR, author = {Ou, Fu-Zhao and Li, Chongyi and Wang, Shiqi and Kwong, Sam}, title = {CLIB-FIQA: Face Image Quality Assessment with Confidence Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1694-1704} }
Predicated Diffusion: Predicate Logic-Based Attention Guidance for Text-to-Image Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sueyoshi_2024_CVPR, author = {Sueyoshi, Kota and Matsubara, Takashi}, title = {Predicated Diffusion: Predicate Logic-Based Attention Guidance for Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8651-8660} }
MoML: Online Meta Adaptation for 3D Human Motion Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2024_CVPR, author = {Sun, Xiaoning and Sun, Huaijiang and Li, Bin and Wei, Dong and Li, Weiqing and Lu, Jianfeng}, title = {MoML: Online Meta Adaptation for 3D Human Motion Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1042-1051} }
CAT-DM: Controllable Accelerated Virtual Try-on with Diffusion Model-
[pdf]
[supp]
[bibtex]@InProceedings{Zeng_2024_CVPR, author = {Zeng, Jianhao and Song, Dan and Nie, Weizhi and Tian, Hongshuo and Wang, Tongtong and Liu, An-An}, title = {CAT-DM: Controllable Accelerated Virtual Try-on with Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8372-8382} }
Synergistic Global-space Camera and Human Reconstruction from Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Yizhou and Wang, Tuanfeng Yang and Raj, Bhiksha and Xu, Min and Yang, Jimei and Huang, Chun-Hao Paul}, title = {Synergistic Global-space Camera and Human Reconstruction from Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1216-1226} }
3D Face Reconstruction with the Geometric Guidance of Facial Part Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Zidu and Zhu, Xiangyu and Zhang, Tianshuo and Wang, Baiqin and Lei, Zhen}, title = {3D Face Reconstruction with the Geometric Guidance of Facial Part Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1672-1682} }
FreeU: Free Lunch in Diffusion U-Net-
[pdf]
[supp]
[bibtex]@InProceedings{Si_2024_CVPR, author = {Si, Chenyang and Huang, Ziqi and Jiang, Yuming and Liu, Ziwei}, title = {FreeU: Free Lunch in Diffusion U-Net}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4733-4743} }
ViewDiff: 3D-Consistent Image Generation with Text-to-Image Models-
[pdf]
[supp]
[bibtex]@InProceedings{Hollein_2024_CVPR, author = {H\"ollein, Lukas and Bo\v{z}i?, Alja\v{z} and M\"uller, Norman and Novotny, David and Tseng, Hung-Yu and Richardt, Christian and Zollh\"ofer, Michael and Nie{\ss}ner, Matthias}, title = {ViewDiff: 3D-Consistent Image Generation with Text-to-Image Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5043-5052} }
Diffusion Models Without Attention-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yan_2024_CVPR, author = {Yan, Jing Nathan and Gu, Jiatao and Rush, Alexander M.}, title = {Diffusion Models Without Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8239-8249} }
Emotional Speech-driven 3D Body Animation via Disentangled Latent Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Chhatre_2024_CVPR, author = {Chhatre, Kiran and Dan??ek, Radek and Athanasiou, Nikos and Becherini, Giorgio and Peters, Christopher and Black, Michael J. and Bolkart, Timo}, title = {Emotional Speech-driven 3D Body Animation via Disentangled Latent Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1942-1953} }
Retrieval-Augmented Layout Transformer for Content-Aware Layout Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Horita_2024_CVPR, author = {Horita, Daichi and Inoue, Naoto and Kikuchi, Kotaro and Yamaguchi, Kota and Aizawa, Kiyoharu}, title = {Retrieval-Augmented Layout Transformer for Content-Aware Layout Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {67-76} }
InstantBooth: Personalized Text-to-Image Generation without Test-Time Finetuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2024_CVPR, author = {Shi, Jing and Xiong, Wei and Lin, Zhe and Jung, Hyun Joon}, title = {InstantBooth: Personalized Text-to-Image Generation without Test-Time Finetuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8543-8552} }
SD2Event:Self-supervised Learning of Dynamic Detectors and Contextual Descriptors for Event Cameras-
[pdf]
[bibtex]@InProceedings{Gao_2024_CVPR, author = {Gao, Yuan and Zhu, Yuqing and Li, Xinjun and Du, Yimin and Zhang, Tianzhu}, title = {SD2Event:Self-supervised Learning of Dynamic Detectors and Contextual Descriptors for Event Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3055-3064} }
PaReNeRF: Toward Fast Large-scale Dynamic NeRF with Patch-based Reference-
[pdf]
[supp]
[bibtex]@InProceedings{Tang_2024_CVPR, author = {Tang, Xiao and Yang, Min and Sun, Penghui and Li, Hui and Dai, Yuchao and Zhu, Feng and Lee, Hojae}, title = {PaReNeRF: Toward Fast Large-scale Dynamic NeRF with Patch-based Reference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5428-5438} }
Affine Equivariant Networks Based on Differential Invariants-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Yikang and Qiu, Yeqing and Chen, Yuxuan and He, Lingshen and Lin, Zhouchen}, title = {Affine Equivariant Networks Based on Differential Invariants}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5546-5556} }
Selectively Informative Description can Reduce Undesired Embedding Entanglements in Text-to-Image Personalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Jimyeong and Park, Jungwon and Rhee, Wonjong}, title = {Selectively Informative Description can Reduce Undesired Embedding Entanglements in Text-to-Image Personalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8312-8322} }
Smooth Diffusion: Crafting Smooth Latent Spaces in Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2024_CVPR, author = {Guo, Jiayi and Xu, Xingqian and Pu, Yifan and Ni, Zanlin and Wang, Chaofei and Vasu, Manushree and Song, Shiji and Huang, Gao and Shi, Humphrey}, title = {Smooth Diffusion: Crafting Smooth Latent Spaces in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7548-7558} }
FlowIE: Efficient Image Enhancement via Rectified Flow-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Yixuan and Zhao, Wenliang and Li, Ao and Tang, Yansong and Zhou, Jie and Lu, Jiwen}, title = {FlowIE: Efficient Image Enhancement via Rectified Flow}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13-22} }
Improving Training Efficiency of Diffusion Models via Multi-Stage Framework and Tailored Multi-Decoder Architecture-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Huijie and Lu, Yifu and Alkhouri, Ismail and Ravishankar, Saiprasad and Song, Dogyoon and Qu, Qing}, title = {Improving Training Efficiency of Diffusion Models via Multi-Stage Framework and Tailored Multi-Decoder Architecture}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7372-7381} }
In-Context Matting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2024_CVPR, author = {Guo, He and Ye, Zixuan and Cao, Zhiguo and Lu, Hao}, title = {In-Context Matting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3711-3720} }
DemoCaricature: Democratising Caricature Generation with a Rough Sketch-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Dar-Yen and Bhunia, Ayan Kumar and Koley, Subhadeep and Sain, Aneeshan and Chowdhury, Pinaki Nath and Song, Yi-Zhe}, title = {DemoCaricature: Democratising Caricature Generation with a Rough Sketch}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8629-8639} }
CapHuman: Capture Your Moments in Parallel Universes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2024_CVPR, author = {Liang, Chao and Ma, Fan and Zhu, Linchao and Deng, Yingying and Yang, Yi}, title = {CapHuman: Capture Your Moments in Parallel Universes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6400-6409} }
SDPose: Tokenized Pose Estimation via Circulation-Guide Self-Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Sichen and Zhang, Yingyi and Huang, Siming and Yi, Ran and Fan, Ke and Zhang, Ruixin and Chen, Peixian and Wang, Jun and Ding, Shouhong and Ma, Lizhuang}, title = {SDPose: Tokenized Pose Estimation via Circulation-Guide Self-Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1082-1090} }
Authentic Hand Avatar from a Phone Scan via Universal Hand Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Moon_2024_CVPR, author = {Moon, Gyeongsik and Xu, Weipeng and Joshi, Rohan and Wu, Chenglei and Shiratori, Takaaki}, title = {Authentic Hand Avatar from a Phone Scan via Universal Hand Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2029-2038} }
Open-World Semantic Segmentation Including Class Similarity-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sodano_2024_CVPR, author = {Sodano, Matteo and Magistri, Federico and Nunes, Lucas and Behley, Jens and Stachniss, Cyrill}, title = {Open-World Semantic Segmentation Including Class Similarity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3184-3194} }
Towards Memorization-Free Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Chen and Liu, Daochang and Xu, Chang}, title = {Towards Memorization-Free Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8425-8434} }
IQ-VFI: Implicit Quadratic Motion Estimation for Video Frame Interpolation-
[pdf]
[bibtex]@InProceedings{Hu_2024_CVPR, author = {Hu, Mengshun and Jiang, Kui and Zhong, Zhihang and Wang, Zheng and Zheng, Yinqiang}, title = {IQ-VFI: Implicit Quadratic Motion Estimation for Video Frame Interpolation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6410-6419} }
KeyPoint Relative Position Encoding for Face Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Minchul and Su, Yiyang and Liu, Feng and Jain, Anil and Liu, Xiaoming}, title = {KeyPoint Relative Position Encoding for Face Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {244-255} }
Hyper-MD: Mesh Denoising with Customized Parameters Aware of Noise Intensity and Geometric Characteristics-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Xingtao and Wei, Hongliang and Fan, Xiaopeng and Zhao, Debin}, title = {Hyper-MD: Mesh Denoising with Customized Parameters Aware of Noise Intensity and Geometric Characteristics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4651-4660} }
Beyond First-Order Tweedie: Solving Inverse Problems using Latent Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rout_2024_CVPR, author = {Rout, Litu and Chen, Yujia and Kumar, Abhishek and Caramanis, Constantine and Shakkottai, Sanjay and Chu, Wen-Sheng}, title = {Beyond First-Order Tweedie: Solving Inverse Problems using Latent Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9472-9481} }
Rethinking the Objectives of Vector-Quantized Tokenizers for Image Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gu_2024_CVPR, author = {Gu, Yuchao and Wang, Xintao and Ge, Yixiao and Shan, Ying and Shou, Mike Zheng}, title = {Rethinking the Objectives of Vector-Quantized Tokenizers for Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7631-7640} }
Continuous Pose for Monocular Cameras in Neural Implicit Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2024_CVPR, author = {Ma, Qi and Paudel, Danda Pani and Chhatkuli, Ajad and Van Gool, Luc}, title = {Continuous Pose for Monocular Cameras in Neural Implicit Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5291-5301} }
D^4: Dataset Distillation via Disentangled Diffusion Model-
[pdf]
[supp]
[bibtex]@InProceedings{Su_2024_CVPR, author = {Su, Duo and Hou, Junjie and Gao, Weizhi and Tian, Yingjie and Tang, Bowen}, title = {D{\textasciicircum}4: Dataset Distillation via Disentangled Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5809-5818} }
360DVD: Controllable Panorama Video Generation with 360-Degree Video Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Qian and Li, Weiqi and Mou, Chong and Cheng, Xinhua and Zhang, Jian}, title = {360DVD: Controllable Panorama Video Generation with 360-Degree Video Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6913-6923} }
RankMatch: Exploring the Better Consistency Regularization for Semi-supervised Semantic Segmentation-
[pdf]
[bibtex]@InProceedings{Mai_2024_CVPR, author = {Mai, Huayu and Sun, Rui and Zhang, Tianzhu and Wu, Feng}, title = {RankMatch: Exploring the Better Consistency Regularization for Semi-supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3391-3401} }
DuPL: Dual Student with Trustworthy Progressive Learning for Robust Weakly Supervised Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Yuanchen and Ye, Xichen and Yang, Kequan and Li, Jide and Li, Xiaoqiang}, title = {DuPL: Dual Student with Trustworthy Progressive Learning for Robust Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3534-3543} }
SurMo: Surface-based 4D Motion Modeling for Dynamic Human Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2024_CVPR, author = {Hu, Tao and Hong, Fangzhou and Liu, Ziwei}, title = {SurMo: Surface-based 4D Motion Modeling for Dynamic Human Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6550-6560} }
Hierarchical Spatio-temporal Decoupling for Text-to-Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qing_2024_CVPR, author = {Qing, Zhiwu and Zhang, Shiwei and Wang, Jiayu and Wang, Xiang and Wei, Yujie and Zhang, Yingya and Gao, Changxin and Sang, Nong}, title = {Hierarchical Spatio-temporal Decoupling for Text-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6635-6645} }
PLACE: Adaptive Layout-Semantic Fusion for Semantic Image Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lv_2024_CVPR, author = {Lv, Zhengyao and Wei, Yuxiang and Zuo, Wangmeng and Wong, Kwan-Yee K.}, title = {PLACE: Adaptive Layout-Semantic Fusion for Semantic Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9264-9274} }
Exploring Efficient Asymmetric Blind-Spots for Self-Supervised Denoising in Real-World Scenarios-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Shiyan and Zhang, Jiyuan and Yu, Zhaofei and Huang, Tiejun}, title = {Exploring Efficient Asymmetric Blind-Spots for Self-Supervised Denoising in Real-World Scenarios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2814-2823} }
Efficient Multi-scale Network with Learnable Discrete Wavelet Transform for Blind Motion Deblurring-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2024_CVPR, author = {Gao, Xin and Qiu, Tianheng and Zhang, Xinyu and Bai, Hanlin and Liu, Kang and Huang, Xuan and Wei, Hu and Zhang, Guoying and Liu, Huaping}, title = {Efficient Multi-scale Network with Learnable Discrete Wavelet Transform for Blind Motion Deblurring}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2733-2742} }
MaskPLAN: Masked Generative Layout Planning from Partial Input-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Hang and Savov, Anton and Dillenburger, Benjamin}, title = {MaskPLAN: Masked Generative Layout Planning from Partial Input}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8964-8973} }
HMD-Poser: On-Device Real-time Human Motion Tracking from Scalable Sparse Observations-
[pdf]
[supp]
[bibtex]@InProceedings{Dai_2024_CVPR, author = {Dai, Peng and Zhang, Yang and Liu, Tao and Fan, Zhen and Du, Tianyuan and Su, Zhuo and Zheng, Xiaozheng and Li, Zeming}, title = {HMD-Poser: On-Device Real-time Human Motion Tracking from Scalable Sparse Observations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {874-884} }
Flexible Biometrics Recognition: Bridging the Multimodality Gap through Attention Alignment and Prompt Tuning-
[pdf]
[supp]
[bibtex]@InProceedings{Tiong_2024_CVPR, author = {Tiong, Leslie Ching Ow and Sigmund, Dick and Chan, Chen-Hui and Teoh, Andrew Beng Jin}, title = {Flexible Biometrics Recognition: Bridging the Multimodality Gap through Attention Alignment and Prompt Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {267-276} }
Multi-scale Dynamic and Hierarchical Relationship Modeling for Facial Action Units Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Zihan and Song, Siyang and Luo, Cheng and Deng, Songhe and Xie, Weicheng and Shen, Linlin}, title = {Multi-scale Dynamic and Hierarchical Relationship Modeling for Facial Action Units Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1270-1280} }
EventEgo3D: 3D Human Motion Capture from Egocentric Event Streams-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Millerdurai_2024_CVPR, author = {Millerdurai, Christen and Akada, Hiroyasu and Wang, Jian and Luvizon, Diogo and Theobalt, Christian and Golyanik, Vladislav}, title = {EventEgo3D: 3D Human Motion Capture from Egocentric Event Streams}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1186-1195} }
A Call to Reflect on Evaluation Practices for Age Estimation: Comparative Analysis of the State-of-the-Art and a Unified Benchmark-
[pdf]
[supp]
[bibtex]@InProceedings{Paplham_2024_CVPR, author = {Paplh\'am, Jakub and Franc, Vojt?ch}, title = {A Call to Reflect on Evaluation Practices for Age Estimation: Comparative Analysis of the State-of-the-Art and a Unified Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1196-1205} }
CosalPure: Learning Concept from Group Images for Robust Co-Saliency Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Jiayi and Guo, Qing and Juefei-Xu, Felix and Huang, Yihao and Liu, Yang and Pu, Geguang}, title = {CosalPure: Learning Concept from Group Images for Robust Co-Saliency Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3669-3678} }
MRFP: Learning Generalizable Semantic Segmentation from Sim-2-Real with Multi-Resolution Feature Perturbation-
[pdf]
[supp]
[bibtex]@InProceedings{Udupa_2024_CVPR, author = {Udupa, Sumanth and Gurunath, Prajwal and Sikdar, Aniruddh and Sundaram, Suresh}, title = {MRFP: Learning Generalizable Semantic Segmentation from Sim-2-Real with Multi-Resolution Feature Perturbation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5904-5914} }
MotionEditor: Editing Video Motion via Content-Aware Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tu_2024_CVPR, author = {Tu, Shuyuan and Dai, Qi and Cheng, Zhi-Qi and Hu, Han and Han, Xintong and Wu, Zuxuan and Jiang, Yu-Gang}, title = {MotionEditor: Editing Video Motion via Content-Aware Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7882-7891} }
Doubly Abductive Counterfactual Inference for Text-based Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2024_CVPR, author = {Song, Xue and Cui, Jiequan and Zhang, Hanwang and Chen, Jingjing and Hong, Richang and Jiang, Yu-Gang}, title = {Doubly Abductive Counterfactual Inference for Text-based Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9162-9171} }
Normalizing Flows on the Product Space of SO(3) Manifolds for Probabilistic Human Pose Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Dunkel_2024_CVPR, author = {D\"unkel, Olaf and Salzmann, Tim and Pfaff, Florian}, title = {Normalizing Flows on the Product Space of SO(3) Manifolds for Probabilistic Human Pose Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2285-2294} }
ReGenNet: Towards Human Action-Reaction Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Liang and Zhou, Yizhou and Yan, Yichao and Jin, Xin and Zhu, Wenhan and Rao, Fengyun and Yang, Xiaokang and Zeng, Wenjun}, title = {ReGenNet: Towards Human Action-Reaction Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1759-1769} }
A Simple Baseline for Efficient Hand Mesh Reconstruction-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Zhishan and Zhou, Shihao and Lv, Zhi and Zou, Minqiang and Tang, Yao and Liang, Jiajun}, title = {A Simple Baseline for Efficient Hand Mesh Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1367-1376} }
PhotoMaker: Customizing Realistic Human Photos via Stacked ID Embedding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Zhen and Cao, Mingdeng and Wang, Xintao and Qi, Zhongang and Cheng, Ming-Ming and Shan, Ying}, title = {PhotoMaker: Customizing Realistic Human Photos via Stacked ID Embedding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8640-8650} }
Score-Guided Diffusion for 3D Human Recovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Stathopoulos_2024_CVPR, author = {Stathopoulos, Anastasis and Han, Ligong and Metaxas, Dimitris}, title = {Score-Guided Diffusion for 3D Human Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {906-915} }
Check Locate Rectify: A Training-Free Layout Calibration System for Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gong_2024_CVPR, author = {Gong, Biao and Huang, Siteng and Feng, Yutong and Zhang, Shiwei and Li, Yuyuan and Liu, Yu}, title = {Check Locate Rectify: A Training-Free Layout Calibration System for Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6624-6634} }
Pose-Transformed Equivariant Network for 3D Point Trajectory Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Ruixuan and Sun, Jian}, title = {Pose-Transformed Equivariant Network for 3D Point Trajectory Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5503-5512} }
Revisiting Sampson Approximations for Geometric Estimation Problems-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rydell_2024_CVPR, author = {Rydell, Felix and Torres, Ang\'elica and Larsson, Viktor}, title = {Revisiting Sampson Approximations for Geometric Estimation Problems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4990-4998} }
Fixed Point Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bai_2024_CVPR, author = {Bai, Xingjian and Melas-Kyriazi, Luke}, title = {Fixed Point Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9430-9440} }
Residual Learning in Diffusion Models-
[pdf]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Junyu and Liu, Daochang and Park, Eunbyung and Zhang, Shichao and Xu, Chang}, title = {Residual Learning in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7289-7299} }
Beyond Textual Constraints: Learning Novel Diffusion Conditions with Fewer Examples-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Yuyang and Liu, Bangzhen and Zheng, Chenxi and Xu, Xuemiao and Zhang, Huaidong and He, Shengfeng}, title = {Beyond Textual Constraints: Learning Novel Diffusion Conditions with Fewer Examples}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7109-7118} }
Exploiting Style Latent Flows for Generalizing Deepfake Video Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2024_CVPR, author = {Choi, Jongwook and Kim, Taehoon and Jeong, Yonghyun and Baek, Seungryul and Choi, Jongwon}, title = {Exploiting Style Latent Flows for Generalizing Deepfake Video Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1133-1143} }
Video-P2P: Video Editing with Cross-attention Control-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Shaoteng and Zhang, Yuechen and Li, Wenbo and Lin, Zhe and Jia, Jiaya}, title = {Video-P2P: Video Editing with Cross-attention Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8599-8608} }
Hunting Attributes: Context Prototype-Aware Learning for Weakly Supervised Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2024_CVPR, author = {Tang, Feilong and Xu, Zhongxing and Qu, Zhaojun and Feng, Wei and Jiang, Xingjian and Ge, Zongyuan}, title = {Hunting Attributes: Context Prototype-Aware Learning for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3324-3334} }
PIE-NeRF: Physics-based Interactive Elastodynamics with NeRF-
[pdf]
[supp]
[bibtex]@InProceedings{Feng_2024_CVPR, author = {Feng, Yutao and Shang, Yintong and Li, Xuan and Shao, Tianjia and Jiang, Chenfanfu and Yang, Yin}, title = {PIE-NeRF: Physics-based Interactive Elastodynamics with NeRF}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4450-4461} }
FlashAvatar: High-fidelity Head Avatar with Efficient Gaussian Embedding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiang_2024_CVPR, author = {Xiang, Jun and Gao, Xuan and Guo, Yudong and Zhang, Juyong}, title = {FlashAvatar: High-fidelity Head Avatar with Efficient Gaussian Embedding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1802-1812} }
ZERO-IG: Zero-Shot Illumination-Guided Joint Denoising and Adaptive Enhancement for Low-Light Images-
[pdf]
[supp]
[bibtex]@InProceedings{Shi_2024_CVPR, author = {Shi, Yiqi and Liu, Duo and Zhang, Liguo and Tian, Ye and Xia, Xuezhi and Fu, Xiaojing}, title = {ZERO-IG: Zero-Shot Illumination-Guided Joint Denoising and Adaptive Enhancement for Low-Light Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3015-3024} }
FinePOSE: Fine-Grained Prompt-Driven 3D Human Pose Estimation via Diffusion Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Jinglin and Guo, Yijie and Peng, Yuxin}, title = {FinePOSE: Fine-Grained Prompt-Driven 3D Human Pose Estimation via Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {561-570} }
DreamPropeller: Supercharge Text-to-3D Generation with Parallel Sampling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Linqi and Shih, Andy and Meng, Chenlin and Ermon, Stefano}, title = {DreamPropeller: Supercharge Text-to-3D Generation with Parallel Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4610-4619} }
Dysen-VDM: Empowering Dynamics-aware Text-to-Video Diffusion with LLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Fei_2024_CVPR, author = {Fei, Hao and Wu, Shengqiong and Ji, Wei and Zhang, Hanwang and Chua, Tat-Seng}, title = {Dysen-VDM: Empowering Dynamics-aware Text-to-Video Diffusion with LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7641-7653} }
General Object Foundation Model for Images and Videos at Scale-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Junfeng and Jiang, Yi and Liu, Qihao and Yuan, Zehuan and Bai, Xiang and Bai, Song}, title = {General Object Foundation Model for Images and Videos at Scale}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3783-3795} }
Inlier Confidence Calibration for Point Cloud Registration-
[pdf]
[supp]
[bibtex]@InProceedings{Yuan_2024_CVPR, author = {Yuan, Yongzhe and Wu, Yue and Fan, Xiaolong and Gong, Maoguo and Miao, Qiguang and Ma, Wenping}, title = {Inlier Confidence Calibration for Point Cloud Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5312-5321} }
Readout Guidance: Learning Control from Diffusion Features-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2024_CVPR, author = {Luo, Grace and Darrell, Trevor and Wang, Oliver and Goldman, Dan B and Holynski, Aleksander}, title = {Readout Guidance: Learning Control from Diffusion Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8217-8227} }
A Unified Approach for Text- and Image-guided 4D Scene Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Zheng_2024_CVPR, author = {Zheng, Yufeng and Li, Xueting and Nagano, Koki and Liu, Sifei and Hilliges, Otmar and De Mello, Shalini}, title = {A Unified Approach for Text- and Image-guided 4D Scene Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7300-7309} }
GaussianAvatar: Towards Realistic Human Avatar Modeling from a Single Video via Animatable 3D Gaussians-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2024_CVPR, author = {Hu, Liangxiao and Zhang, Hongwen and Zhang, Yuxiang and Zhou, Boyao and Liu, Boning and Zhang, Shengping and Nie, Liqiang}, title = {GaussianAvatar: Towards Realistic Human Avatar Modeling from a Single Video via Animatable 3D Gaussians}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {634-644} }
Mosaic-SDF for 3D Generative Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yariv_2024_CVPR, author = {Yariv, Lior and Puny, Omri and Gafni, Oran and Lipman, Yaron}, title = {Mosaic-SDF for 3D Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4630-4639} }
Diffusion Handles Enabling 3D Edits for Diffusion Models by Lifting Activations to 3D-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pandey_2024_CVPR, author = {Pandey, Karran and Guerrero, Paul and Gadelha, Matheus and Hold-Geoffroy, Yannick and Singh, Karan and Mitra, Niloy J.}, title = {Diffusion Handles Enabling 3D Edits for Diffusion Models by Lifting Activations to 3D}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7695-7704} }
Friendly Sharpness-Aware Minimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Tao and Zhou, Pan and He, Zhengbao and Cheng, Xinwen and Huang, Xiaolin}, title = {Friendly Sharpness-Aware Minimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5631-5640} }
BIVDiff: A Training-Free Framework for General-Purpose Video Synthesis via Bridging Image and Video Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2024_CVPR, author = {Shi, Fengyuan and Gu, Jiaxi and Xu, Hang and Xu, Songcen and Zhang, Wei and Wang, Limin}, title = {BIVDiff: A Training-Free Framework for General-Purpose Video Synthesis via Bridging Image and Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7393-7402} }
NC-TTT: A Noise Constrastive Approach for Test-Time Training-
[pdf]
[supp]
[bibtex]@InProceedings{Osowiechi_2024_CVPR, author = {Osowiechi, David and Hakim, Gustavo A. Vargas and Noori, Mehrdad and Cheraghalikhani, Milad and Bahri, Ali and Yazdanpanah, Moslem and Ben Ayed, Ismail and Desrosiers, Christian}, title = {NC-TTT: A Noise Constrastive Approach for Test-Time Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6078-6086} }
Small Scale Data-Free Knowledge Distillation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, He and Wang, Yikai and Liu, Huaping and Sun, Fuchun and Yao, Anbang}, title = {Small Scale Data-Free Knowledge Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6008-6016} }
CFPL-FAS: Class Free Prompt Learning for Generalizable Face Anti-spoofing-
[pdf]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Ajian and Xue, Shuai and Gan, Jianwen and Wan, Jun and Liang, Yanyan and Deng, Jiankang and Escalera, Sergio and Lei, Zhen}, title = {CFPL-FAS: Class Free Prompt Learning for Generalizable Face Anti-spoofing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {222-232} }
Open Vocabulary Semantic Scene Sketch Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bourouis_2024_CVPR, author = {Bourouis, Ahmed and Fan, Judith E. and Gryaditskaya, Yulia}, title = {Open Vocabulary Semantic Scene Sketch Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4176-4186} }
IntrinsicAvatar: Physically Based Inverse Rendering of Dynamic Humans from Monocular Videos via Explicit Ray Tracing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Shaofei and Antic, Bozidar and Geiger, Andreas and Tang, Siyu}, title = {IntrinsicAvatar: Physically Based Inverse Rendering of Dynamic Humans from Monocular Videos via Explicit Ray Tracing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1877-1888} }
Efficient Detection of Long Consistent Cycles and its Application to Distributed Synchronization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Shaohan and Shi, Yunpeng and Lerman, Gilad}, title = {Efficient Detection of Long Consistent Cycles and its Application to Distributed Synchronization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5260-5269} }
Vlogger: Make Your Dream A Vlog-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhuang_2024_CVPR, author = {Zhuang, Shaobin and Li, Kunchang and Chen, Xinyuan and Wang, Yaohui and Liu, Ziwei and Qiao, Yu and Wang, Yali}, title = {Vlogger: Make Your Dream A Vlog}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8806-8817} }
Neural 3D Strokes: Creating Stylized 3D Scenes with Vectorized 3D Strokes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Duan_2024_CVPR, author = {Duan, Hao-Bin and Wang, Miao and Li, Yan-Xun and Yang, Yong-Liang}, title = {Neural 3D Strokes: Creating Stylized 3D Scenes with Vectorized 3D Strokes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5240-5249} }
Multi-Object Tracking in the Dark-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Xinzhe and Ma, Kang and Liu, Qiankun and Zou, Yunhao and Fu, Ying}, title = {Multi-Object Tracking in the Dark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {382-392} }
UniHuman: A Unified Model For Editing Human Images in the Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Nannan and Liu, Qing and Singh, Krishna Kumar and Wang, Yilin and Zhang, Jianming and Plummer, Bryan A. and Lin, Zhe}, title = {UniHuman: A Unified Model For Editing Human Images in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2039-2048} }
DiffAgent: Fast and Accurate Text-to-Image API Selection with Large Language Model-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Lirui and Yang, Yue and Zhang, Kaipeng and Shao, Wenqi and Zhang, Yuxin and Qiao, Yu and Luo, Ping and Ji, Rongrong}, title = {DiffAgent: Fast and Accurate Text-to-Image API Selection with Large Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6390-6399} }
In Search of a Data Transformation That Accelerates Neural Field Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Seo_2024_CVPR, author = {Seo, Junwon and Lee, Sangyoon and Kim, Kwang In and Lee, Jaeho}, title = {In Search of a Data Transformation That Accelerates Neural Field Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4830-4839} }
Zero-Painter: Training-Free Layout Control for Text-to-Image Synthesis-
[pdf]
[supp]
[bibtex]@InProceedings{Ohanyan_2024_CVPR, author = {Ohanyan, Marianna and Manukyan, Hayk and Wang, Zhangyang and Navasardyan, Shant and Shi, Humphrey}, title = {Zero-Painter: Training-Free Layout Control for Text-to-Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8764-8774} }
Towards 3D Vision with Low-Cost Single-Photon Cameras-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mu_2024_CVPR, author = {Mu, Fangzhou and Sifferman, Carter and Jungerman, Sacha and Li, Yiquan and Han, Mark and Gleicher, Michael and Gupta, Mohit and Li, Yin}, title = {Towards 3D Vision with Low-Cost Single-Photon Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5302-5311} }
WonderJourney: Going from Anywhere to Everywhere-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Hong-Xing and Duan, Haoyi and Hur, Junhwa and Sargent, Kyle and Rubinstein, Michael and Freeman, William T. and Cole, Forrester and Sun, Deqing and Snavely, Noah and Wu, Jiajun and Herrmann, Charles}, title = {WonderJourney: Going from Anywhere to Everywhere}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6658-6667} }
4D-fy: Text-to-4D Generation Using Hybrid Score Distillation Sampling-
[pdf]
[supp]
[bibtex]@InProceedings{Bahmani_2024_CVPR, author = {Bahmani, Sherwin and Skorokhodov, Ivan and Rong, Victor and Wetzstein, Gordon and Guibas, Leonidas and Wonka, Peter and Tulyakov, Sergey and Park, Jeong Joon and Tagliasacchi, Andrea and Lindell, David B.}, title = {4D-fy: Text-to-4D Generation Using Hybrid Score Distillation Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7996-8006} }
FreeControl: Training-Free Spatial Control of Any Text-to-Image Diffusion Model with Any Condition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mo_2024_CVPR, author = {Mo, Sicheng and Mu, Fangzhou and Lin, Kuan Heng and Liu, Yanli and Guan, Bochen and Li, Yin and Zhou, Bolei}, title = {FreeControl: Training-Free Spatial Control of Any Text-to-Image Diffusion Model with Any Condition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7465-7475} }
VMC: Video Motion Customization using Temporal Attention Adaption for Text-to-Video Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeong_2024_CVPR, author = {Jeong, Hyeonho and Park, Geon Yeong and Ye, Jong Chul}, title = {VMC: Video Motion Customization using Temporal Attention Adaption for Text-to-Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9212-9221} }
DistriFusion: Distributed Parallel Inference for High-Resolution Diffusion Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Muyang and Cai, Tianle and Cao, Jiaxin and Zhang, Qinsheng and Cai, Han and Bai, Junjie and Jia, Yangqing and Li, Kai and Han, Song}, title = {DistriFusion: Distributed Parallel Inference for High-Resolution Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7183-7193} }
AZ-NAS: Assembling Zero-Cost Proxies for Network Architecture Search-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2024_CVPR, author = {Lee, Junghyup and Ham, Bumsub}, title = {AZ-NAS: Assembling Zero-Cost Proxies for Network Architecture Search}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5893-5903} }
Improving Physics-Augmented Continuum Neural Radiance Field-Based Geometry-Agnostic System Identification with Lagrangian Particle Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kaneko_2024_CVPR, author = {Kaneko, Takuhiro}, title = {Improving Physics-Augmented Continuum Neural Radiance Field-Based Geometry-Agnostic System Identification with Lagrangian Particle Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5470-5480} }
Beyond Image Super-Resolution for Image Recognition with Task-Driven Perceptual Loss-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Jaeha and Oh, Junghun and Lee, Kyoung Mu}, title = {Beyond Image Super-Resolution for Image Recognition with Task-Driven Perceptual Loss}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2651-2661} }
XCube: Large-Scale 3D Generative Modeling using Sparse Voxel Hierarchies-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2024_CVPR, author = {Ren, Xuanchi and Huang, Jiahui and Zeng, Xiaohui and Museth, Ken and Fidler, Sanja and Williams, Francis}, title = {XCube: Large-Scale 3D Generative Modeling using Sparse Voxel Hierarchies}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4209-4219} }
Reconstruction-free Cascaded Adaptive Compressive Sensing-
[pdf]
[bibtex]@InProceedings{Qiu_2024_CVPR, author = {Qiu, Chenxi and Yue, Tao and Hu, Xuemei}, title = {Reconstruction-free Cascaded Adaptive Compressive Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2620-2630} }
USE: Universal Segment Embeddings for Open-Vocabulary Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Xiaoqi and He, Wenbin and Xuan, Xiwei and Sebastian, Clint and Ono, Jorge Piazentin and Li, Xin and Behpour, Sima and Doan, Thang and Gou, Liang and Shen, Han-Wei and Ren, Liu}, title = {USE: Universal Segment Embeddings for Open-Vocabulary Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4187-4196} }
Functional Diffusion-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Biao and Wonka, Peter}, title = {Functional Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4723-4732} }
Wired Perspectives: Multi-View Wire Art Embraces Generative AI-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qu_2024_CVPR, author = {Qu, Zhiyu and Yang, Lan and Zhang, Honggang and Xiang, Tao and Pang, Kaiyue and Song, Yi-Zhe}, title = {Wired Perspectives: Multi-View Wire Art Embraces Generative AI}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6149-6158} }
Leveraging Camera Triplets for Efficient and Accurate Structure-from-Motion-
[pdf]
[supp]
[bibtex]@InProceedings{Manam_2024_CVPR, author = {Manam, Lalit and Govindu, Venu Madhav}, title = {Leveraging Camera Triplets for Efficient and Accurate Structure-from-Motion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4959-4968} }
SimDA: Simple Diffusion Adapter for Efficient Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xing_2024_CVPR, author = {Xing, Zhen and Dai, Qi and Hu, Han and Wu, Zuxuan and Jiang, Yu-Gang}, title = {SimDA: Simple Diffusion Adapter for Efficient Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7827-7839} }
Multi-view Aggregation Network for Dichotomous Image Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Qian and Zhao, Xiaoqi and Pang, Youwei and Zhang, Lihe and Lu, Huchuan}, title = {Multi-view Aggregation Network for Dichotomous Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3921-3930} }
A Recipe for Scaling up Text-to-Video Generation with Text-free Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Xiang and Zhang, Shiwei and Yuan, Hangjie and Qing, Zhiwu and Gong, Biao and Zhang, Yingya and Shen, Yujun and Gao, Changxin and Sang, Nong}, title = {A Recipe for Scaling up Text-to-Video Generation with Text-free Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6572-6582} }
Molecular Data Programming: Towards Molecule Pseudo-labeling with Systematic Weak Supervision-
[pdf]
[supp]
[bibtex]@InProceedings{Juan_2024_CVPR, author = {Juan, Xin and Zhou, Kaixiong and Liu, Ninghao and Chen, Tianlong and Wang, Xin}, title = {Molecular Data Programming: Towards Molecule Pseudo-labeling with Systematic Weak Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {308-318} }
Residual Denoising Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Jiawei and Wang, Qiang and Fan, Huijie and Wang, Yinong and Tang, Yandong and Qu, Liangqiong}, title = {Residual Denoising Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2773-2783} }
Towards Accurate and Robust Architectures via Neural Architecture Search-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ou_2024_CVPR, author = {Ou, Yuwei and Feng, Yuqi and Sun, Yanan}, title = {Towards Accurate and Robust Architectures via Neural Architecture Search}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5967-5976} }
Closely Interactive Human Reconstruction with Proxemics and Physics-Guided Adaption-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Buzhen and Li, Chen and Xu, Chongyang and Pan, Liang and Wang, Yangang and Lee, Gim Hee}, title = {Closely Interactive Human Reconstruction with Proxemics and Physics-Guided Adaption}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1011-1021} }
Taming Stable Diffusion for Text to 360 Panorama Image Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Cheng and Wu, Qianyi and Gambardella, Camilo Cruz and Huang, Xiaoshui and Phung, Dinh and Ouyang, Wanli and Cai, Jianfei}, title = {Taming Stable Diffusion for Text to 360 Panorama Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6347-6357} }
Modular Blind Video Quality Assessment-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wen_2024_CVPR, author = {Wen, Wen and Li, Mu and Zhang, Yabin and Liao, Yiting and Li, Junlin and Zhang, Li and Ma, Kede}, title = {Modular Blind Video Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2763-2772} }
RELI11D: A Comprehensive Multimodal Human Motion Dataset and Method-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2024_CVPR, author = {Yan, Ming and Zhang, Yan and Cai, Shuqiang and Fan, Shuqi and Lin, Xincheng and Dai, Yudi and Shen, Siqi and Wen, Chenglu and Xu, Lan and Ma, Yuexin and Wang, Cheng}, title = {RELI11D: A Comprehensive Multimodal Human Motion Dataset and Method}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2250-2262} }
One-Class Face Anti-spoofing via Spoof Cue Map-Guided Feature Learning-
[pdf]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Pei-Kai and Chiang, Cheng-Hsuan and Chen, Tzu-Hsien and Chong, Jun-Xiong and Liu, Tyng-Luh and Hsu, Chiou-Ting}, title = {One-Class Face Anti-spoofing via Spoof Cue Map-Guided Feature Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {277-286} }
InteractDiffusion: Interaction Control in Text-to-Image Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hoe_2024_CVPR, author = {Hoe, Jiun Tian and Jiang, Xudong and Chan, Chee Seng and Tan, Yap-Peng and Hu, Weipeng}, title = {InteractDiffusion: Interaction Control in Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6180-6189} }
Emergent Open-Vocabulary Semantic Segmentation from Off-the-shelf Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2024_CVPR, author = {Luo, Jiayun and Khandelwal, Siddhesh and Sigal, Leonid and Li, Boyang}, title = {Emergent Open-Vocabulary Semantic Segmentation from Off-the-shelf Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4029-4040} }
SelfPose3d: Self-Supervised Multi-Person Multi-View 3d Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Srivastav_2024_CVPR, author = {Srivastav, Vinkle and Chen, Keqi and Padoy, Nicolas}, title = {SelfPose3d: Self-Supervised Multi-Person Multi-View 3d Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2502-2512} }
Joint2Human: High-Quality 3D Human Generation via Compact Spherical Embedding of 3D Joints-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Muxin and Feng, Qiao and Su, Zhuo and Wen, Chao and Xue, Zhou and Li, Kun}, title = {Joint2Human: High-Quality 3D Human Generation via Compact Spherical Embedding of 3D Joints}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1429-1438} }
Prompt-Free Diffusion: Taking "Text" out of Text-to-Image Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Xingqian and Guo, Jiayi and Wang, Zhangyang and Huang, Gao and Essa, Irfan and Shi, Humphrey}, title = {Prompt-Free Diffusion: Taking ''Text'' out of Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8682-8692} }
Multi-agent Long-term 3D Human Pose Forecasting via Interaction-aware Trajectory Conditioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeong_2024_CVPR, author = {Jeong, Jaewoo and Park, Daehee and Yoon, Kuk-Jin}, title = {Multi-agent Long-term 3D Human Pose Forecasting via Interaction-aware Trajectory Conditioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1617-1628} }
CLOAF: CoLlisiOn-Aware Human Flow-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Davydov_2024_CVPR, author = {Davydov, Andrey and Engilberge, Martin and Salzmann, Mathieu and Fua, Pascal}, title = {CLOAF: CoLlisiOn-Aware Human Flow}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1176-1185} }
Hybrid Functional Maps for Crease-Aware Non-Isometric Shape Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bastian_2024_CVPR, author = {Bastian, Lennart and Xie, Yizheng and Navab, Nassir and L\"ahner, Zorah}, title = {Hybrid Functional Maps for Crease-Aware Non-Isometric Shape Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3313-3323} }
Density-Guided Semi-Supervised 3D Semantic Segmentation with Dual-Space Hardness Sampling-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Jianan and Dong, Qiulei}, title = {Density-Guided Semi-Supervised 3D Semantic Segmentation with Dual-Space Hardness Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3260-3269} }
ElasticDiffusion: Training-free Arbitrary Size Image Generation through Global-Local Content Separation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Haji-Ali_2024_CVPR, author = {Haji-Ali, Moayed and Balakrishnan, Guha and Ordonez, Vicente}, title = {ElasticDiffusion: Training-free Arbitrary Size Image Generation through Global-Local Content Separation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6603-6612} }
Locally Adaptive Neural 3D Morphable Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tarasiou_2024_CVPR, author = {Tarasiou, Michail and Potamias, Rolandos Alexandros and O'Sullivan, Eimear and Ploumpis, Stylianos and Zafeiriou, Stefanos}, title = {Locally Adaptive Neural 3D Morphable Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1867-1876} }
ICON: Incremental CONfidence for Joint Pose and Radiance Field Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Weiyao and Gleize, Pierre and Tang, Hao and Chen, Xingyu and Liang, Kevin J and Feiszli, Matt}, title = {ICON: Incremental CONfidence for Joint Pose and Radiance Field Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5406-5417} }
Learned Scanpaths Aid Blind Panoramic Video Quality Assessment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2024_CVPR, author = {Fan, Kanglong and Wen, Wen and Li, Mu and Peng, Yifan and Ma, Kede}, title = {Learned Scanpaths Aid Blind Panoramic Video Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2599-2608} }
TI2V-Zero: Zero-Shot Image Conditioning for Text-to-Video Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Ni_2024_CVPR, author = {Ni, Haomiao and Egger, Bernhard and Lohit, Suhas and Cherian, Anoop and Wang, Ye and Koike-Akino, Toshiaki and Huang, Sharon X. and Marks, Tim K.}, title = {TI2V-Zero: Zero-Shot Image Conditioning for Text-to-Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9015-9025} }
iToF-flow-based High Frame Rate Depth Imaging-
[pdf]
[bibtex]@InProceedings{Meng_2024_CVPR, author = {Meng, Yu and Xue, Zhou and Chang, Xu and Hu, Xuemei and Yue, Tao}, title = {iToF-flow-based High Frame Rate Depth Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4929-4938} }
Relightful Harmonization: Lighting-aware Portrait Background Replacement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2024_CVPR, author = {Ren, Mengwei and Xiong, Wei and Yoon, Jae Shin and Shu, Zhixin and Zhang, Jianming and Jung, HyunJoon and Gerig, Guido and Zhang, He}, title = {Relightful Harmonization: Lighting-aware Portrait Background Replacement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6452-6462} }
Mitigating Motion Blur in Neural Radiance Fields with Events and Frames-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cannici_2024_CVPR, author = {Cannici, Marco and Scaramuzza, Davide}, title = {Mitigating Motion Blur in Neural Radiance Fields with Events and Frames}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9286-9296} }
TokenHMR: Advancing Human Mesh Recovery with a Tokenized Pose Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dwivedi_2024_CVPR, author = {Dwivedi, Sai Kumar and Sun, Yu and Patel, Priyanka and Feng, Yao and Black, Michael J.}, title = {TokenHMR: Advancing Human Mesh Recovery with a Tokenized Pose Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1323-1333} }
FaceCom: Towards High-fidelity 3D Facial Shape Completion via Optimization and Inpainting Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Yinglong and Wu, Hongyu and Wang, Xiaogang and Qin, Qingzhao and Zhao, Yijiao and Wang, Yong and Hao, Aimin}, title = {FaceCom: Towards High-fidelity 3D Facial Shape Completion via Optimization and Inpainting Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2177-2186} }
LightOctree: Lightweight 3D Spatially-Coherent Indoor Lighting Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Xuecan and Xiao, Shibang and Liang, Xiaohui}, title = {LightOctree: Lightweight 3D Spatially-Coherent Indoor Lighting Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4536-4545} }
FaceLift: Semi-supervised 3D Facial Landmark Localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ferman_2024_CVPR, author = {Ferman, David and Garrido, Pablo and Bharaj, Gaurav}, title = {FaceLift: Semi-supervised 3D Facial Landmark Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1781-1791} }
PSDPM: Prototype-based Secondary Discriminative Pixels Mining for Weakly Supervised Semantic Segmentation-
[pdf]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Xinqiao and Yang, Ziqian and Dai, Tianhong and Zhang, Bingfeng and Xiao, Jimin}, title = {PSDPM: Prototype-based Secondary Discriminative Pixels Mining for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3437-3446} }
Frozen CLIP: A Strong Backbone for Weakly Supervised Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Bingfeng and Yu, Siyue and Wei, Yunchao and Zhao, Yao and Xiao, Jimin}, title = {Frozen CLIP: A Strong Backbone for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3796-3806} }
LAFS: Landmark-based Facial Self-supervised Learning for Face Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2024_CVPR, author = {Sun, Zhonglin and Feng, Chen and Patras, Ioannis and Tzimiropoulos, Georgios}, title = {LAFS: Landmark-based Facial Self-supervised Learning for Face Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1639-1649} }
SED: A Simple Encoder-Decoder for Open-Vocabulary Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2024_CVPR, author = {Xie, Bin and Cao, Jiale and Xie, Jin and Khan, Fahad Shahbaz and Pang, Yanwei}, title = {SED: A Simple Encoder-Decoder for Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3426-3436} }
GPLD3D: Latent Diffusion of 3D Shape Generative Models by Enforcing Geometric and Physical Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Dong_2024_CVPR, author = {Dong, Yuan and Zuo, Qi and Gu, Xiaodong and Yuan, Weihao and Zhao, Zhengyi and Dong, Zilong and Bo, Liefeng and Huang, Qixing}, title = {GPLD3D: Latent Diffusion of 3D Shape Generative Models by Enforcing Geometric and Physical Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {56-66} }
Self-correcting LLM-controlled Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Tsung-Han and Lian, Long and Gonzalez, Joseph E. and Li, Boyi and Darrell, Trevor}, title = {Self-correcting LLM-controlled Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6327-6336} }
PACER+: On-Demand Pedestrian Animation Controller in Driving Scenarios-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Jingbo and Luo, Zhengyi and Yuan, Ye and Li, Yixuan and Dai, Bo}, title = {PACER+: On-Demand Pedestrian Animation Controller in Driving Scenarios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {718-728} }
LTM: Lightweight Textured Mesh Extraction and Refinement of Large Unbounded Scenes for Efficient Storage and Real-time Rendering-
[pdf]
[supp]
[bibtex]@InProceedings{Choi_2024_CVPR, author = {Choi, Jaehoon and Shah, Rajvi and Li, Qinbo and Wang, Yipeng and Saraf, Ayush and Kim, Changil and Huang, Jia-Bin and Manocha, Dinesh and Alsisan, Suhib and Kopf, Johannes}, title = {LTM: Lightweight Textured Mesh Extraction and Refinement of Large Unbounded Scenes for Efficient Storage and Real-time Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5053-5063} }
Don't Drop Your Samples! Coherence-Aware Training Benefits Conditional Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Dufour_2024_CVPR, author = {Dufour, Nicolas and Besnier, Victor and Kalogeiton, Vicky and Picard, David}, title = {Don't Drop Your Samples! Coherence-Aware Training Benefits Conditional Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6264-6273} }
What Do You See in Vehicle? Comprehensive Vision Solution for In-Vehicle Gaze Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2024_CVPR, author = {Cheng, Yihua and Zhu, Yaning and Wang, Zongji and Hao, Hongquan and Liu, Yongwei and Cheng, Shiqing and Wang, Xi and Chang, Hyung Jin}, title = {What Do You See in Vehicle? Comprehensive Vision Solution for In-Vehicle Gaze Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1556-1565} }
UFORecon: Generalizable Sparse-View Surface Reconstruction from Arbitrary and Unfavorable Sets-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Na_2024_CVPR, author = {Na, Youngju and Kim, Woo Jae and Han, Kyu Beom and Ha, Suhyeon and Yoon, Sung-Eui}, title = {UFORecon: Generalizable Sparse-View Surface Reconstruction from Arbitrary and Unfavorable Sets}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5094-5104} }
Breathing Life Into Sketches Using Text-to-Video Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gal_2024_CVPR, author = {Gal, Rinon and Vinker, Yael and Alaluf, Yuval and Bermano, Amit and Cohen-Or, Daniel and Shamir, Ariel and Chechik, Gal}, title = {Breathing Life Into Sketches Using Text-to-Video Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4325-4336} }
Learning Diffusion Texture Priors for Image Restoration-
[pdf]
[bibtex]@InProceedings{Ye_2024_CVPR, author = {Ye, Tian and Chen, Sixiang and Chai, Wenhao and Xing, Zhaohu and Qin, Jing and Lin, Ge and Zhu, Lei}, title = {Learning Diffusion Texture Priors for Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2524-2534} }
Entangled View-Epipolar Information Aggregation for Generalizable Neural Radiance Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Min_2024_CVPR, author = {Min, Zhiyuan and Luo, Yawei and Yang, Wei and Wang, Yuesong and Yang, Yi}, title = {Entangled View-Epipolar Information Aggregation for Generalizable Neural Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4906-4916} }
YolOOD: Utilizing Object Detection Concepts for Multi-Label Out-of-Distribution Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zolfi_2024_CVPR, author = {Zolfi, Alon and Amit, Guy and Baras, Amit and Koda, Satoru and Morikawa, Ikuya and Elovici, Yuval and Shabtai, Asaf}, title = {YolOOD: Utilizing Object Detection Concepts for Multi-Label Out-of-Distribution Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5788-5797} }
Collaborating Foundation Models for Domain Generalized Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Benigmim_2024_CVPR, author = {Benigmim, Yasser and Roy, Subhankar and Essid, Slim and Kalogeiton, Vicky and Lathuili\`ere, St\'ephane}, title = {Collaborating Foundation Models for Domain Generalized Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3108-3119} }
Towards Variable and Coordinated Holistic Co-Speech Motion Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Yifei and Cao, Qiong and Wen, Yandong and Jiang, Huaiguang and Ding, Changxing}, title = {Towards Variable and Coordinated Holistic Co-Speech Motion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1566-1576} }
AllSpark: Reborn Labeled Features from Unlabeled in Transformer for Semi-Supervised Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Haonan and Zhang, Qixiang and Li, Yi and Li, Xiaomeng}, title = {AllSpark: Reborn Labeled Features from Unlabeled in Transformer for Semi-Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3627-3636} }
SIGNeRF: Scene Integrated Generation for Neural Radiance Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dihlmann_2024_CVPR, author = {Dihlmann, Jan-Niklas and Engelhardt, Andreas and Lensch, Hendrik}, title = {SIGNeRF: Scene Integrated Generation for Neural Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6679-6688} }
Generating Illustrated Instructions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Menon_2024_CVPR, author = {Menon, Sachit and Misra, Ishan and Girdhar, Rohit}, title = {Generating Illustrated Instructions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6274-6284} }
Robust Image Denoising through Adversarial Frequency Mixup-
[pdf]
[supp]
[bibtex]@InProceedings{Ryou_2024_CVPR, author = {Ryou, Donghun and Ha, Inju and Yoo, Hyewon and Kim, Dongwan and Han, Bohyung}, title = {Robust Image Denoising through Adversarial Frequency Mixup}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2723-2732} }
AnyScene: Customized Image Synthesis with Composited Foreground-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Ruidong and Wang, Lanjun and Nie, Weizhi and Zhang, Yongdong and Liu, An-An}, title = {AnyScene: Customized Image Synthesis with Composited Foreground}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8724-8733} }
Training Generative Image Super-Resolution Models by Wavelet-Domain Losses Enables Better Control of Artifacts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Korkmaz_2024_CVPR, author = {Korkmaz, Cansu and Tekalp, A. Murat and Dogan, Zafer}, title = {Training Generative Image Super-Resolution Models by Wavelet-Domain Losses Enables Better Control of Artifacts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5926-5936} }
Monocular Identity-Conditioned Facial Reflectance Reconstruction-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ren_2024_CVPR, author = {Ren, Xingyu and Deng, Jiankang and Cheng, Yuhao and Guo, Jia and Ma, Chao and Yan, Yichao and Zhu, Wenhan and Yang, Xiaokang}, title = {Monocular Identity-Conditioned Facial Reflectance Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {885-895} }
C3: High-Performance and Low-Complexity Neural Compression from a Single Image or Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Hyunjik and Bauer, Matthias and Theis, Lucas and Schwarz, Jonathan Richard and Dupont, Emilien}, title = {C3: High-Performance and Low-Complexity Neural Compression from a Single Image or Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9347-9358} }
Revisiting Non-Autoregressive Transformers for Efficient Image Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ni_2024_CVPR, author = {Ni, Zanlin and Wang, Yulin and Zhou, Renping and Guo, Jiayi and Hu, Jinyi and Liu, Zhiyuan and Song, Shiji and Yao, Yuan and Huang, Gao}, title = {Revisiting Non-Autoregressive Transformers for Efficient Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7007-7016} }
ANIM: Accurate Neural Implicit Model for Human Reconstruction from a single RGB-D Image-
[pdf]
[supp]
[bibtex]@InProceedings{Pesavento_2024_CVPR, author = {Pesavento, Marco and Xu, Yuanlu and Sarafianos, Nikolaos and Maier, Robert and Wang, Ziyan and Yao, Chun-Han and Volino, Marco and Boyer, Edmond and Hilton, Adrian and Tung, Tony}, title = {ANIM: Accurate Neural Implicit Model for Human Reconstruction from a single RGB-D Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5448-5458} }
Real-Time Simulated Avatar from Head-Mounted Sensors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2024_CVPR, author = {Luo, Zhengyi and Cao, Jinkun and Khirodkar, Rawal and Winkler, Alexander and Kitani, Kris and Xu, Weipeng}, title = {Real-Time Simulated Avatar from Head-Mounted Sensors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {571-581} }
Seamless Human Motion Composition with Blended Positional Encodings-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Barquero_2024_CVPR, author = {Barquero, German and Escalera, Sergio and Palmero, Cristina}, title = {Seamless Human Motion Composition with Blended Positional Encodings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {457-469} }
FedUV: Uniformity and Variance for Heterogeneous Federated Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Son_2024_CVPR, author = {Son, Ha Min and Kim, Moon-Hyun and Chung, Tai-Myoung and Huang, Chao and Liu, Xin}, title = {FedUV: Uniformity and Variance for Heterogeneous Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5863-5872} }
GAvatar: Animatable 3D Gaussian Avatars with Implicit Mesh Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2024_CVPR, author = {Yuan, Ye and Li, Xueting and Huang, Yangyi and De Mello, Shalini and Nagano, Koki and Kautz, Jan and Iqbal, Umar}, title = {GAvatar: Animatable 3D Gaussian Avatars with Implicit Mesh Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {896-905} }
Grounding Everything: Emerging Localization Properties in Vision-Language Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bousselham_2024_CVPR, author = {Bousselham, Walid and Petersen, Felix and Ferrari, Vittorio and Kuehne, Hilde}, title = {Grounding Everything: Emerging Localization Properties in Vision-Language Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3828-3837} }
Mean-Shift Feature Transformer-
[pdf]
[supp]
[bibtex]@InProceedings{Kobayashi_2024_CVPR, author = {Kobayashi, Takumi}, title = {Mean-Shift Feature Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6047-6056} }
Domain Separation Graph Neural Networks for Saliency Object Ranking-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Zijian and Lu, Jun and Han, Jing and Bai, Lianfa and Zhang, Yi and Zhao, Zhuang and Song, Siyang}, title = {Domain Separation Graph Neural Networks for Saliency Object Ranking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3964-3974} }
RAM-Avatar: Real-time Photo-Realistic Avatar from Monocular Videos with Full-body Control-
[pdf]
[supp]
[bibtex]@InProceedings{Deng_2024_CVPR, author = {Deng, Xiang and Zheng, Zerong and Zhang, Yuxiang and Sun, Jingxiang and Xu, Chao and Yang, Xiaodong and Wang, Lizhen and Liu, Yebin}, title = {RAM-Avatar: Real-time Photo-Realistic Avatar from Monocular Videos with Full-body Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1996-2007} }
Video Prediction by Modeling Videos as Continuous Multi-Dimensional Processes-
[pdf]
[supp]
[bibtex]@InProceedings{Shrivastava_2024_CVPR, author = {Shrivastava, Gaurav and Shrivastava, Abhinav}, title = {Video Prediction by Modeling Videos as Continuous Multi-Dimensional Processes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7236-7245} }
PICTURE: PhotorealistIC virtual Try-on from UnconstRained dEsigns-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ning_2024_CVPR, author = {Ning, Shuliang and Wang, Duomin and Qin, Yipeng and Jin, Zirong and Wang, Baoyuan and Han, Xiaoguang}, title = {PICTURE: PhotorealistIC virtual Try-on from UnconstRained dEsigns}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6976-6985} }
Towards Robust 3D Pose Transfer with Adversarial Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Haoyu and Tang, Hao and Adeli, Ehsan and Zhao, Guoying}, title = {Towards Robust 3D Pose Transfer with Adversarial Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2295-2304} }
EAGLE: Eigen Aggregation Learning for Object-Centric Unsupervised Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Chanyoung and Han, Woojung and Ju, Dayun and Hwang, Seong Jae}, title = {EAGLE: Eigen Aggregation Learning for Object-Centric Unsupervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3523-3533} }
AVID: Any-Length Video Inpainting with Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Zhixing and Wu, Bichen and Wang, Xiaoyan and Luo, Yaqiao and Zhang, Luxin and Zhao, Yinan and Vajda, Peter and Metaxas, Dimitris and Yu, Licheng}, title = {AVID: Any-Length Video Inpainting with Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7162-7172} }
NoiseCollage: A Layout-Aware Text-to-Image Diffusion Model Based on Noise Cropping and Merging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shirakawa_2024_CVPR, author = {Shirakawa, Takahiro and Uchida, Seiichi}, title = {NoiseCollage: A Layout-Aware Text-to-Image Diffusion Model Based on Noise Cropping and Merging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8921-8930} }
Arbitrary Motion Style Transfer with Multi-condition Motion Latent Diffusion Model-
[pdf]
[supp]
[bibtex]@InProceedings{Song_2024_CVPR, author = {Song, Wenfeng and Jin, Xingliang and Li, Shuai and Chen, Chenglizhao and Hao, Aimin and Hou, Xia and Li, Ning and Qin, Hong}, title = {Arbitrary Motion Style Transfer with Multi-condition Motion Latent Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {821-830} }
ViT-CoMer: Vision Transformer with Convolutional Multi-scale Feature Interaction for Dense Predictions-
[pdf]
[bibtex]@InProceedings{Xia_2024_CVPR, author = {Xia, Chunlong and Wang, Xinliang and Lv, Feng and Hao, Xin and Shi, Yifeng}, title = {ViT-CoMer: Vision Transformer with Convolutional Multi-scale Feature Interaction for Dense Predictions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5493-5502} }
PromptCoT: Align Prompt Distribution via Adapted Chain-of-Thought-
[pdf]
[supp]
[bibtex]@InProceedings{Yao_2024_CVPR, author = {Yao, Junyi and Liu, Yijiang and Dong, Zhen and Guo, Mingfei and Hu, Helan and Keutzer, Kurt and Du, Li and Zhou, Daquan and Zhang, Shanghang}, title = {PromptCoT: Align Prompt Distribution via Adapted Chain-of-Thought}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7027-7037} }
Anomaly Score: Evaluating Generative Models and Individual Generated Images based on Complexity and Vulnerability-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hwang_2024_CVPR, author = {Hwang, Jaehui and Lee, Junghyuk and Lee, Jong-Seok}, title = {Anomaly Score: Evaluating Generative Models and Individual Generated Images based on Complexity and Vulnerability}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8754-8763} }
GeneAvatar: Generic Expression-Aware Volumetric Head Avatar Editing from a Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bao_2024_CVPR, author = {Bao, Chong and Zhang, Yinda and Li, Yuan and Zhang, Xiyu and Yang, Bangbang and Bao, Hujun and Pollefeys, Marc and Zhang, Guofeng and Cui, Zhaopeng}, title = {GeneAvatar: Generic Expression-Aware Volumetric Head Avatar Editing from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8952-8963} }
Learn to Rectify the Bias of CLIP for Unsupervised Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Jingyun and Kang, Guoliang}, title = {Learn to Rectify the Bias of CLIP for Unsupervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4102-4112} }
Unlocking Pre-trained Image Backbones for Semantic Image Synthesis-
[pdf]
[supp]
[bibtex]@InProceedings{Ifriqi_2024_CVPR, author = {Ifriqi, Tariq Berrada and Verbeek, Jakob and Couprie, Camille and Alahari, Karteek}, title = {Unlocking Pre-trained Image Backbones for Semantic Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7840-7849} }
TexTile: A Differentiable Metric for Texture Tileability-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rodriguez-Pardo_2024_CVPR, author = {Rodriguez-Pardo, Carlos and Casas, Dan and Garces, Elena and Lopez-Moreno, Jorge}, title = {TexTile: A Differentiable Metric for Texture Tileability}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4439-4449} }
Improving Image Restoration through Removing Degradations in Textual Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2024_CVPR, author = {Lin, Jingbo and Zhang, Zhilu and Wei, Yuxiang and Ren, Dongwei and Jiang, Dongsheng and Tian, Qi and Zuo, Wangmeng}, title = {Improving Image Restoration through Removing Degradations in Textual Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2866-2878} }
ZONE: Zero-Shot Instruction-Guided Local Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Shanglin and Zeng, Bohan and Feng, Yutang and Gao, Sicheng and Liu, Xiuhui and Liu, Jiaming and Li, Lin and Tang, Xu and Hu, Yao and Liu, Jianzhuang and Zhang, Baochang}, title = {ZONE: Zero-Shot Instruction-Guided Local Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6254-6263} }
U-VAP: User-specified Visual Appearance Personalization via Decoupled Self Augmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, You and Liu, Kean and Mi, Xiaoyue and Tang, Fan and Cao, Juan and Li, Jintao}, title = {U-VAP: User-specified Visual Appearance Personalization via Decoupled Self Augmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9482-9491} }
HHMR: Holistic Hand Mesh Recovery by Enhancing the Multimodal Controllability of Graph Diffusion Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Mengcheng and Zhang, Hongwen and Zhang, Yuxiang and Shao, Ruizhi and Yu, Tao and Liu, Yebin}, title = {HHMR: Holistic Hand Mesh Recovery by Enhancing the Multimodal Controllability of Graph Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {645-654} }
Robust Self-calibration of Focal Lengths from the Fundamental Matrix-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kocur_2024_CVPR, author = {Kocur, Viktor and Kyselica, Daniel and Kukelova, Zuzana}, title = {Robust Self-calibration of Focal Lengths from the Fundamental Matrix}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5220-5229} }
PartDistill: 3D Shape Part Segmentation by Vision-Language Model Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Umam_2024_CVPR, author = {Umam, Ardian and Yang, Cheng-Kun and Chen, Min-Hung and Chuang, Jen-Hui and Lin, Yen-Yu}, title = {PartDistill: 3D Shape Part Segmentation by Vision-Language Model Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3470-3479} }
DragDiffusion: Harnessing Diffusion Models for Interactive Point-based Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2024_CVPR, author = {Shi, Yujun and Xue, Chuhui and Liew, Jun Hao and Pan, Jiachun and Yan, Hanshu and Zhang, Wenqing and Tan, Vincent Y. F. and Bai, Song}, title = {DragDiffusion: Harnessing Diffusion Models for Interactive Point-based Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8839-8849} }
Addressing Background Context Bias in Few-Shot Segmentation through Iterative Modulation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Lanyun and Chen, Tianrun and Yin, Jianxiong and See, Simon and Liu, Jun}, title = {Addressing Background Context Bias in Few-Shot Segmentation through Iterative Modulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3370-3379} }
TiNO-Edit: Timestep and Noise Optimization for Robust Diffusion-Based Image Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Sherry X and Vaxman, Yaron and Ben Baruch, Elad and Asulin, David and Moreshet, Aviad and Lien, Kuo-Chin and Sra, Misha and Sen, Pradeep}, title = {TiNO-Edit: Timestep and Noise Optimization for Robust Diffusion-Based Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6337-6346} }
AdaShift: Learning Discriminative Self-Gated Neural Feature Activation With an Adaptive Shift Factor-
[pdf]
[supp]
[bibtex]@InProceedings{Cai_2024_CVPR, author = {Cai, Sudong}, title = {AdaShift: Learning Discriminative Self-Gated Neural Feature Activation With an Adaptive Shift Factor}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5947-5956} }
SCEdit: Efficient and Controllable Image Diffusion Generation via Skip Connection Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2024_CVPR, author = {Jiang, Zeyinzi and Mao, Chaojie and Pan, Yulin and Han, Zhen and Zhang, Jingfeng}, title = {SCEdit: Efficient and Controllable Image Diffusion Generation via Skip Connection Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8995-9004} }
BA-SAM: Scalable Bias-Mode Attention Mask for Segment Anything Model-
[pdf]
[supp]
[bibtex]@InProceedings{Song_2024_CVPR, author = {Song, Yiran and Zhou, Qianyu and Li, Xiangtai and Fan, Deng-Ping and Lu, Xuequan and Ma, Lizhuang}, title = {BA-SAM: Scalable Bias-Mode Attention Mask for Segment Anything Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3162-3173} }
Deciphering 'What' and 'Where' Visual Pathways from Spectral Clustering of Layer-Distributed Neural Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Xiao and Yunis, David and Maire, Michael}, title = {Deciphering 'What' and 'Where' Visual Pathways from Spectral Clustering of Layer-Distributed Neural Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4165-4175} }
Real-Time Exposure Correction via Collaborative Transformations and Adaptive Sampling-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Ziwen and Zhang, Feng and Cao, Meng and Zhang, Jinpu and Shao, Yuanjie and Wang, Yuehuan and Sang, Nong}, title = {Real-Time Exposure Correction via Collaborative Transformations and Adaptive Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2984-2994} }
Lodge: A Coarse to Fine Diffusion Network for Long Dance Generation Guided by the Characteristic Dance Primitives-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Ronghui and Zhang, YuXiang and Zhang, Yachao and Zhang, Hongwen and Guo, Jie and Zhang, Yan and Liu, Yebin and Li, Xiu}, title = {Lodge: A Coarse to Fine Diffusion Network for Long Dance Generation Guided by the Characteristic Dance Primitives}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1524-1534} }
Transcending Forgery Specificity with Latent Space Augmentation for Generalizable Deepfake Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2024_CVPR, author = {Yan, Zhiyuan and Luo, Yuhao and Lyu, Siwei and Liu, Qingshan and Wu, Baoyuan}, title = {Transcending Forgery Specificity with Latent Space Augmentation for Generalizable Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8984-8994} }
Scaling Laws of Synthetic Images for Model Training ... for Now-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2024_CVPR, author = {Fan, Lijie and Chen, Kaifeng and Krishnan, Dilip and Katabi, Dina and Isola, Phillip and Tian, Yonglong}, title = {Scaling Laws of Synthetic Images for Model Training ... for Now}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7382-7392} }
State Space Models for Event Cameras-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zubic_2024_CVPR, author = {Zubic, Nikola and Gehrig, Mathias and Scaramuzza, Davide}, title = {State Space Models for Event Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5819-5828} }
TeTriRF: Temporal Tri-Plane Radiance Fields for Efficient Free-Viewpoint Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Minye and Wang, Zehao and Kouros, Georgios and Tuytelaars, Tinne}, title = {TeTriRF: Temporal Tri-Plane Radiance Fields for Efficient Free-Viewpoint Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6487-6496} }
Event-assisted Low-Light Video Object Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Hebei and Wang, Jin and Yuan, Jiahui and Li, Yue and Weng, Wenming and Peng, Yansong and Zhang, Yueyi and Xiong, Zhiwei and Sun, Xiaoyan}, title = {Event-assisted Low-Light Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3250-3259} }
VidToMe: Video Token Merging for Zero-Shot Video Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Xirui and Ma, Chao and Yang, Xiaokang and Yang, Ming-Hsuan}, title = {VidToMe: Video Token Merging for Zero-Shot Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7486-7495} }
FaceChain-SuDe: Building Derived Class to Inherit Category Attributes for One-shot Subject-Driven Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Qiao_2024_CVPR, author = {Qiao, Pengchong and Shang, Lei and Liu, Chang and Sun, Baigui and Ji, Xiangyang and Chen, Jie}, title = {FaceChain-SuDe: Building Derived Class to Inherit Category Attributes for One-shot Subject-Driven Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7215-7224} }
StableVITON: Learning Semantic Correspondence with Latent Diffusion Model for Virtual Try-On-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Jeongho and Gu, Guojung and Park, Minho and Park, Sunghyun and Choo, Jaegul}, title = {StableVITON: Learning Semantic Correspondence with Latent Diffusion Model for Virtual Try-On}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8176-8185} }
Make-Your-Anchor: A Diffusion-based 2D Avatar Generation Framework-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Ziyao and Tang, Fan and Zhang, Yong and Cun, Xiaodong and Cao, Juan and Li, Jintao and Lee, Tong-Yee}, title = {Make-Your-Anchor: A Diffusion-based 2D Avatar Generation Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6997-7006} }
Learning Dynamic Tetrahedra for High-Quality Talking Head Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Zicheng and Zheng, Ruobing and Li, Bonan and Han, Congying and Li, Tianqi and Wang, Meng and Guo, Tiande and Chen, Jingdong and Liu, Ziwen and Yang, Ming}, title = {Learning Dynamic Tetrahedra for High-Quality Talking Head Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5209-5219} }
3D Geometry-Aware Deformable Gaussian Splatting for Dynamic View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2024_CVPR, author = {Lu, Zhicheng and Guo, Xiang and Hui, Le and Chen, Tianrui and Yang, Min and Tang, Xiao and Zhu, Feng and Dai, Yuchao}, title = {3D Geometry-Aware Deformable Gaussian Splatting for Dynamic View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8900-8910} }
Person-in-WiFi 3D: End-to-End Multi-Person 3D Pose Estimation with Wi-Fi-
[pdf]
[bibtex]@InProceedings{Yan_2024_CVPR, author = {Yan, Kangwei and Wang, Fei and Qian, Bo and Ding, Han and Han, Jinsong and Wei, Xing}, title = {Person-in-WiFi 3D: End-to-End Multi-Person 3D Pose Estimation with Wi-Fi}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {969-978} }
Fairy: Fast Parallelized Instruction-Guided Video-to-Video Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Bichen and Chuang, Ching-Yao and Wang, Xiaoyan and Jia, Yichen and Krishnakumar, Kapil and Xiao, Tong and Liang, Feng and Yu, Licheng and Vajda, Peter}, title = {Fairy: Fast Parallelized Instruction-Guided Video-to-Video Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8261-8270} }
SmartEdit: Exploring Complex Instruction-based Image Editing with Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Yuzhou and Xie, Liangbin and Wang, Xintao and Yuan, Ziyang and Cun, Xiaodong and Ge, Yixiao and Zhou, Jiantao and Dong, Chao and Huang, Rui and Zhang, Ruimao and Shan, Ying}, title = {SmartEdit: Exploring Complex Instruction-based Image Editing with Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8362-8371} }
It's All About Your Sketch: Democratising Sketch Control in Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Koley_2024_CVPR, author = {Koley, Subhadeep and Bhunia, Ayan Kumar and Sekhri, Deeptanshu and Sain, Aneeshan and Chowdhury, Pinaki Nath and Xiang, Tao and Song, Yi-Zhe}, title = {It's All About Your Sketch: Democratising Sketch Control in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7204-7214} }
When StyleGAN Meets Stable Diffusion: a W+ Adapter for Personalized Image Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Xiaoming and Hou, Xinyu and Loy, Chen Change}, title = {When StyleGAN Meets Stable Diffusion: a W+ Adapter for Personalized Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2187-2196} }
CAM Back Again: Large Kernel CNNs from a Weakly Supervised Object Localization Perspective-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yasuki_2024_CVPR, author = {Yasuki, Shunsuke and Taki, Masato}, title = {CAM Back Again: Large Kernel CNNs from a Weakly Supervised Object Localization Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {341-351} }
Putting the Object Back into Video Object Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2024_CVPR, author = {Cheng, Ho Kei and Oh, Seoung Wug and Price, Brian and Lee, Joon-Young and Schwing, Alexander}, title = {Putting the Object Back into Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3151-3161} }
Concept Weaver: Enabling Multi-Concept Fusion in Text-to-Image Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kwon_2024_CVPR, author = {Kwon, Gihyun and Jenni, Simon and Li, Dingzeyu and Lee, Joon-Young and Ye, Jong Chul and Heilbron, Fabian Caba}, title = {Concept Weaver: Enabling Multi-Concept Fusion in Text-to-Image Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8880-8889} }
Cross-Domain Few-Shot Segmentation via Iterative Support-Query Correspondence Mining-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nie_2024_CVPR, author = {Nie, Jiahao and Xing, Yun and Zhang, Gongjie and Yan, Pei and Xiao, Aoran and Tan, Yap-Peng and Kot, Alex C. and Lu, Shijian}, title = {Cross-Domain Few-Shot Segmentation via Iterative Support-Query Correspondence Mining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3380-3390} }
DiffSHEG: A Diffusion-Based Approach for Real-Time Speech-driven Holistic 3D Expression and Gesture Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Junming and Liu, Yunfei and Wang, Jianan and Zeng, Ailing and Li, Yu and Chen, Qifeng}, title = {DiffSHEG: A Diffusion-Based Approach for Real-Time Speech-driven Holistic 3D Expression and Gesture Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7352-7361} }
Animating General Image with Large Visual Motion Model-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Dengsheng and Wei, Xiaoming and Wei, Xiaolin}, title = {Animating General Image with Large Visual Motion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7131-7140} }
DIRECT-3D: Learning Direct Text-to-3D Generation on Massive Noisy 3D Data-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Qihao and Zhang, Yi and Bai, Song and Kortylewski, Adam and Yuille, Alan}, title = {DIRECT-3D: Learning Direct Text-to-3D Generation on Massive Noisy 3D Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6881-6891} }
OHTA: One-shot Hand Avatar via Data-driven Implicit Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2024_CVPR, author = {Zheng, Xiaozheng and Wen, Chao and Su, Zhuo and Xu, Zeran and Li, Zhaohu and Zhao, Yang and Xue, Zhou}, title = {OHTA: One-shot Hand Avatar via Data-driven Implicit Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {799-810} }
Human Motion Prediction Under Unexpected Perturbation-
[pdf]
[supp]
[bibtex]@InProceedings{Yue_2024_CVPR, author = {Yue, Jiangbei and Li, Baiyi and Pettr\'e, Julien and Seyfried, Armin and Wang, He}, title = {Human Motion Prediction Under Unexpected Perturbation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1501-1511} }
Text-to-3D Generation with Bidirectional Diffusion using both 2D and 3D priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ding_2024_CVPR, author = {Ding, Lihe and Dong, Shaocong and Huang, Zhanpeng and Wang, Zibin and Zhang, Yiyuan and Gong, Kaixiong and Xu, Dan and Xue, Tianfan}, title = {Text-to-3D Generation with Bidirectional Diffusion using both 2D and 3D priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5115-5124} }
Make-It-Vivid: Dressing Your Animatable Biped Cartoon Characters from Text-
[pdf]
[bibtex]@InProceedings{Tang_2024_CVPR, author = {Tang, Junshu and Zeng, Yanhong and Fan, Ke and Wang, Xuheng and Dai, Bo and Chen, Kai and Ma, Lizhuang}, title = {Make-It-Vivid: Dressing Your Animatable Biped Cartoon Characters from Text}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6243-6253} }
Neural Sign Actors: A Diffusion Model for 3D Sign Language Production from Text-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Baltatzis_2024_CVPR, author = {Baltatzis, Vasileios and Potamias, Rolandos Alexandros and Ververas, Evangelos and Sun, Guanxiong and Deng, Jiankang and Zafeiriou, Stefanos}, title = {Neural Sign Actors: A Diffusion Model for 3D Sign Language Production from Text}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1985-1995} }
On the Diversity and Realism of Distilled Dataset: An Efficient Dataset Distillation Paradigm-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2024_CVPR, author = {Sun, Peng and Shi, Bei and Yu, Daiwei and Lin, Tao}, title = {On the Diversity and Realism of Distilled Dataset: An Efficient Dataset Distillation Paradigm}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9390-9399} }
Semantics-aware Motion Retargeting with Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Haodong and Chen, Zhike and Xu, Haocheng and Hao, Lei and Wu, Xiaofei and Xu, Songcen and Zhang, Zhensong and Wang, Yue and Xiong, Rong}, title = {Semantics-aware Motion Retargeting with Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2155-2164} }
Unsupervised Semantic Segmentation Through Depth-Guided Feature Correlation and Sampling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sick_2024_CVPR, author = {Sick, Leon and Engel, Dominik and Hermosilla, Pedro and Ropinski, Timo}, title = {Unsupervised Semantic Segmentation Through Depth-Guided Feature Correlation and Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3637-3646} }
RAVE: Randomized Noise Shuffling for Fast and Consistent Video Editing with Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kara_2024_CVPR, author = {Kara, Ozgur and Kurtkaya, Bariscan and Yesiltepe, Hidir and Rehg, James M. and Yanardag, Pinar}, title = {RAVE: Randomized Noise Shuffling for Fast and Consistent Video Editing with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6507-6516} }
Video-Based Human Pose Regression via Decoupled Space-Time Aggregation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2024_CVPR, author = {He, Jijie and Yang, Wenwu}, title = {Video-Based Human Pose Regression via Decoupled Space-Time Aggregation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1022-1031} }
L-MAGIC: Language Model Assisted Generation of Images with Coherence-
[pdf]
[supp]
[bibtex]@InProceedings{Cai_2024_CVPR, author = {Cai, Zhipeng and Mueller, Matthias and Birkl, Reiner and Wofk, Diana and Tseng, Shao-Yen and Cheng, Junda and Stan, Gabriela Ben-Melech and Lai, Vasudev and Paulitsch, Michael}, title = {L-MAGIC: Language Model Assisted Generation of Images with Coherence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7049-7058} }
3D Face Tracking from 2D Video through Iterative Dense UV to Image Flow-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Taubner_2024_CVPR, author = {Taubner, Felix and Raina, Prashant and Tuli, Mathieu and Teh, Eu Wern and Lee, Chul and Huang, Jinmiao}, title = {3D Face Tracking from 2D Video through Iterative Dense UV to Image Flow}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1227-1237} }
Carve3D: Improving Multi-view Reconstruction Consistency for Diffusion Models with RL Finetuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2024_CVPR, author = {Xie, Desai and Li, Jiahao and Tan, Hao and Sun, Xin and Shu, Zhixin and Zhou, Yi and Bi, Sai and Pirk, S\"oren and Kaufman, Arie E.}, title = {Carve3D: Improving Multi-view Reconstruction Consistency for Diffusion Models with RL Finetuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6369-6379} }
Shadow Generation for Composite Image Using Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Qingyang and You, Junqi and Wang, Jianting and Tao, Xinhao and Zhang, Bo and Niu, Li}, title = {Shadow Generation for Composite Image Using Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8121-8130} }
DisCo: Disentangled Control for Realistic Human Dance Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Tan and Li, Linjie and Lin, Kevin and Zhai, Yuanhao and Lin, Chung-Ching and Yang, Zhengyuan and Zhang, Hanwang and Liu, Zicheng and Wang, Lijuan}, title = {DisCo: Disentangled Control for Realistic Human Dance Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9326-9336} }
GaussianShader: 3D Gaussian Splatting with Shading Functions for Reflective Surfaces-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2024_CVPR, author = {Jiang, Yingwenqi and Tu, Jiadong and Liu, Yuan and Gao, Xifeng and Long, Xiaoxiao and Wang, Wenping and Ma, Yuexin}, title = {GaussianShader: 3D Gaussian Splatting with Shading Functions for Reflective Surfaces}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5322-5332} }
pix2gestalt: Amodal Segmentation by Synthesizing Wholes-
[pdf]
[bibtex]@InProceedings{Ozguroglu_2024_CVPR, author = {Ozguroglu, Ege and Liu, Ruoshi and Sur{\'\i}s, D{\'\i}dac and Chen, Dian and Dave, Achal and Tokmakov, Pavel and Vondrick, Carl}, title = {pix2gestalt: Amodal Segmentation by Synthesizing Wholes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3931-3940} }
Weakly Supervised Point Cloud Semantic Segmentation via Artificial Oracle-
[pdf]
[supp]
[bibtex]@InProceedings{Kweon_2024_CVPR, author = {Kweon, Hyeokjun and Kim, Jihun and Yoon, Kuk-Jin}, title = {Weakly Supervised Point Cloud Semantic Segmentation via Artificial Oracle}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3721-3731} }
Forecasting of 3D Whole-body Human Poses with Grasping Objects-
[pdf]
[bibtex]@InProceedings{Yan_2024_CVPR, author = {Yan, Haitao and Cui, Qiongjie and Xie, Jiexin and Guo, Shijie}, title = {Forecasting of 3D Whole-body Human Poses with Grasping Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1726-1736} }
Accelerating Diffusion Sampling with Optimized Time Steps-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xue_2024_CVPR, author = {Xue, Shuchen and Liu, Zhaoqiang and Chen, Fei and Zhang, Shifeng and Hu, Tianyang and Xie, Enze and Li, Zhenguo}, title = {Accelerating Diffusion Sampling with Optimized Time Steps}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8292-8301} }
Unsupervised Template-assisted Point Cloud Shape Correspondence Network-
[pdf]
[arXiv]
[bibtex]@InProceedings{Deng_2024_CVPR, author = {Deng, Jiacheng and Lu, Jiahao and Zhang, Tianzhu}, title = {Unsupervised Template-assisted Point Cloud Shape Correspondence Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5250-5259} }
Finsler-Laplace-Beltrami Operators with Application to Shape Analysis-
[pdf]
[supp]
[bibtex]@InProceedings{Weber_2024_CVPR, author = {Weber, Simon and Dag\`es, Thomas and Gao, Maolin and Cremers, Daniel}, title = {Finsler-Laplace-Beltrami Operators with Application to Shape Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3131-3140} }
Minimal Perspective Autocalibration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cin_2024_CVPR, author = {Cin, Andrea Porfiri Dal and Duff, Timothy and Magri, Luca and Pajdla, Tomas}, title = {Minimal Perspective Autocalibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5064-5073} }
Time- Memory- and Parameter-Efficient Visual Adaptation-
[pdf]
[supp]
[bibtex]@InProceedings{Mercea_2024_CVPR, author = {Mercea, Otniel-Bogdan and Gritsenko, Alexey and Schmid, Cordelia and Arnab, Anurag}, title = {Time- Memory- and Parameter-Efficient Visual Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5536-5545} }
Suppress and Rebalance: Towards Generalized Multi-Modal Face Anti-Spoofing-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lin_2024_CVPR, author = {Lin, Xun and Wang, Shuai and Cai, Rizhao and Liu, Yizhong and Fu, Ying and Tang, Wenzhong and Yu, Zitong and Kot, Alex}, title = {Suppress and Rebalance: Towards Generalized Multi-Modal Face Anti-Spoofing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {211-221} }
Universal Segmentation at Arbitrary Granularity with Language Instruction-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Yong and Zhang, Cairong and Wang, Yitong and Wang, Jiahao and Yang, Yujiu and Tang, Yansong}, title = {Universal Segmentation at Arbitrary Granularity with Language Instruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3459-3469} }
Layout-Agnostic Scene Text Image Synthesis with Diffusion Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhangli_2024_CVPR, author = {Zhangli, Qilong and Jiang, Jindong and Liu, Di and Yu, Licheng and Dai, Xiaoliang and Ramchandani, Ankit and Pang, Guan and Metaxas, Dimitris N. and Krishnan, Praveen}, title = {Layout-Agnostic Scene Text Image Synthesis with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7496-7506} }
SmartMask: Context Aware High-Fidelity Mask Generation for Fine-grained Object Insertion and Layout Control-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Singh_2024_CVPR, author = {Singh, Jaskirat and Zhang, Jianming and Liu, Qing and Smith, Cameron and Lin, Zhe and Zheng, Liang}, title = {SmartMask: Context Aware High-Fidelity Mask Generation for Fine-grained Object Insertion and Layout Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6497-6506} }
Customization Assistant for Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Yufan and Zhang, Ruiyi and Gu, Jiuxiang and Sun, Tong}, title = {Customization Assistant for Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9182-9191} }
GenHowTo: Learning to Generate Actions and State Transformations from Instructional Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Soucek_2024_CVPR, author = {Sou\v{c}ek, Tom\'a\v{s} and Damen, Dima and Wray, Michael and Laptev, Ivan and Sivic, Josef}, title = {GenHowTo: Learning to Generate Actions and State Transformations from Instructional Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6561-6571} }
Paint-it: Text-to-Texture Synthesis via Deep Convolutional Texture Map Optimization and Physically-Based Rendering-
[pdf]
[supp]
[bibtex]@InProceedings{Youwang_2024_CVPR, author = {Youwang, Kim and Oh, Tae-Hyun and Pons-Moll, Gerard}, title = {Paint-it: Text-to-Texture Synthesis via Deep Convolutional Texture Map Optimization and Physically-Based Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4347-4356} }
Physics-Aware Hand-Object Interaction Denoising-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2024_CVPR, author = {Luo, Haowen and Liu, Yunze and Yi, Li}, title = {Physics-Aware Hand-Object Interaction Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2341-2350} }
VastGaussian: Vast 3D Gaussians for Large Scene Reconstruction-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lin_2024_CVPR, author = {Lin, Jiaqi and Li, Zhihao and Tang, Xiao and Liu, Jianzhuang and Liu, Shiyong and Liu, Jiayue and Lu, Yangdi and Wu, Xiaofei and Xu, Songcen and Yan, Youliang and Yang, Wenming}, title = {VastGaussian: Vast 3D Gaussians for Large Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5166-5175} }
Edit One for All: Interactive Batch Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2024_CVPR, author = {Nguyen, Thao and Ojha, Utkarsh and Li, Yuheng and Liu, Haotian and Lee, Yong Jae}, title = {Edit One for All: Interactive Batch Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8271-8280} }
Deformable One-shot Face Stylization via DINO Semantic Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Yang and Chen, Zichong and Huang, Hui}, title = {Deformable One-shot Face Stylization via DINO Semantic Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7787-7796} }
Coarse-to-Fine Latent Diffusion for Pose-Guided Person Image Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2024_CVPR, author = {Lu, Yanzuo and Zhang, Manlin and Ma, Andy J and Xie, Xiaohua and Lai, Jianhuang}, title = {Coarse-to-Fine Latent Diffusion for Pose-Guided Person Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6420-6429} }
OMG: Towards Open-vocabulary Motion Generation via Mixture of Controllers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2024_CVPR, author = {Liang, Han and Bao, Jiacheng and Zhang, Ruichi and Ren, Sihan and Xu, Yuecheng and Yang, Sibei and Chen, Xin and Yu, Jingyi and Xu, Lan}, title = {OMG: Towards Open-vocabulary Motion Generation via Mixture of Controllers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {482-493} }
Align Your Gaussians: Text-to-4D with Dynamic 3D Gaussians and Composed Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ling_2024_CVPR, author = {Ling, Huan and Kim, Seung Wook and Torralba, Antonio and Fidler, Sanja and Kreis, Karsten}, title = {Align Your Gaussians: Text-to-4D with Dynamic 3D Gaussians and Composed Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8576-8588} }
PDF: A Probability-Driven Framework for Open World 3D Point Cloud Semantic Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Jinfeng and Yang, Siyuan and Li, Xianzhi and Tang, Yuan and Hao, Yixue and Hu, Long and Chen, Min}, title = {PDF: A Probability-Driven Framework for Open World 3D Point Cloud Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5977-5986} }
Test-Time Domain Generalization for Face Anti-Spoofing-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Qianyu and Zhang, Ke-Yue and Yao, Taiping and Lu, Xuequan and Ding, Shouhong and Ma, Lizhuang}, title = {Test-Time Domain Generalization for Face Anti-Spoofing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {175-187} }
Real-time 3D-aware Portrait Video Relighting-
[pdf]
[supp]
[bibtex]@InProceedings{Cai_2024_CVPR, author = {Cai, Ziqi and Jiang, Kaiwen and Chen, Shu-Yu and Lai, Yu-Kun and Fu, Hongbo and Shi, Boxin and Gao, Lin}, title = {Real-time 3D-aware Portrait Video Relighting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6221-6231} }
3DGS-Avatar: Animatable Avatars via Deformable 3D Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Qian_2024_CVPR, author = {Qian, Zhiyin and Wang, Shaofei and Mihajlovic, Marko and Geiger, Andreas and Tang, Siyu}, title = {3DGS-Avatar: Animatable Avatars via Deformable 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5020-5030} }
Style Aligned Image Generation via Shared Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hertz_2024_CVPR, author = {Hertz, Amir and Voynov, Andrey and Fruchter, Shlomi and Cohen-Or, Daniel}, title = {Style Aligned Image Generation via Shared Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4775-4785} }
Back to 3D: Few-Shot 3D Keypoint Detection with Back-Projected 2D Features-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wimmer_2024_CVPR, author = {Wimmer, Thomas and Wonka, Peter and Ovsjanikov, Maks}, title = {Back to 3D: Few-Shot 3D Keypoint Detection with Back-Projected 2D Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4154-4164} }
Neural Markov Random Field for Stereo Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guan_2024_CVPR, author = {Guan, Tongfan and Wang, Chen and Liu, Yun-Hui}, title = {Neural Markov Random Field for Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5459-5469} }
PoseIRM: Enhance 3D Human Pose Estimation on Unseen Camera Settings via Invariant Risk Minimization-
[pdf]
[supp]
[bibtex]@InProceedings{Cai_2024_CVPR, author = {Cai, Yanlu and Zhang, Weizhong and Wu, Yuan and Jin, Cheng}, title = {PoseIRM: Enhance 3D Human Pose Estimation on Unseen Camera Settings via Invariant Risk Minimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2124-2133} }
CCEdit: Creative and Controllable Video Editing via Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2024_CVPR, author = {Feng, Ruoyu and Weng, Wenming and Wang, Yanhui and Yuan, Yuhui and Bao, Jianmin and Luo, Chong and Chen, Zhibo and Guo, Baining}, title = {CCEdit: Creative and Controllable Video Editing via Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6712-6722} }
HAVE-FUN: Human Avatar Reconstruction from Few-Shot Unconstrained Images-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Xihe and Chen, Xingyu and Gao, Daiheng and Wang, Shaohui and Han, Xiaoguang and Wang, Baoyuan}, title = {HAVE-FUN: Human Avatar Reconstruction from Few-Shot Unconstrained Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {742-752} }
DiffMorpher: Unleashing the Capability of Diffusion Models for Image Morphing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Kaiwen and Zhou, Yifan and Xu, Xudong and Dai, Bo and Pan, Xingang}, title = {DiffMorpher: Unleashing the Capability of Diffusion Models for Image Morphing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7912-7921} }
Towards Real-World HDR Video Reconstruction: A Large-Scale Benchmark Dataset and A Two-Stage Alignment Network-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shu_2024_CVPR, author = {Shu, Yong and Shen, Liquan and Hu, Xiangyu and Li, Mengyao and Zhou, Zihao}, title = {Towards Real-World HDR Video Reconstruction: A Large-Scale Benchmark Dataset and A Two-Stage Alignment Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2879-2888} }
Efficient 3D Implicit Head Avatar with Mesh-anchored Hash Table Blendshapes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bai_2024_CVPR, author = {Bai, Ziqian and Tan, Feitong and Fanello, Sean and Pandey, Rohit and Dou, Mingsong and Liu, Shichen and Tan, Ping and Zhang, Yinda}, title = {Efficient 3D Implicit Head Avatar with Mesh-anchored Hash Table Blendshapes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1975-1984} }
No Time to Train: Empowering Non-Parametric Networks for Few-shot 3D Scene Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Xiangyang and Zhang, Renrui and He, Bowei and Guo, Ziyu and Liu, Jiaming and Xiao, Han and Fu, Chaoyou and Dong, Hao and Gao, Peng}, title = {No Time to Train: Empowering Non-Parametric Networks for Few-shot 3D Scene Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3838-3847} }
PhysGaussian: Physics-Integrated 3D Gaussians for Generative Dynamics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2024_CVPR, author = {Xie, Tianyi and Zong, Zeshun and Qiu, Yuxing and Li, Xuan and Feng, Yutao and Yang, Yin and Jiang, Chenfanfu}, title = {PhysGaussian: Physics-Integrated 3D Gaussians for Generative Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4389-4398} }
Spatio-Temporal Turbulence Mitigation: A Translational Perspective-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Xingguang and Chimitt, Nicholas and Chi, Yiheng and Mao, Zhiyuan and Chan, Stanley H.}, title = {Spatio-Temporal Turbulence Mitigation: A Translational Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2889-2899} }
Grounded Text-to-Image Synthesis with Attention Refocusing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Phung_2024_CVPR, author = {Phung, Quynh and Ge, Songwei and Huang, Jia-Bin}, title = {Grounded Text-to-Image Synthesis with Attention Refocusing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7932-7942} }
IReNe: Instant Recoloring of Neural Radiance Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mazzucchelli_2024_CVPR, author = {Mazzucchelli, Alessio and Garcia-Garcia, Adrian and Garces, Elena and Rivas-Manzaneque, Fernando and Moreno-Noguer, Francesc and Penate-Sanchez, Adrian}, title = {IReNe: Instant Recoloring of Neural Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5937-5946} }
Class Tokens Infusion for Weakly Supervised Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Yoon_2024_CVPR, author = {Yoon, Sung-Hoon and Kwon, Hoyong and Kim, Hyeonseong and Yoon, Kuk-Jin}, title = {Class Tokens Infusion for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3595-3605} }
FedHCA2: Towards Hetero-Client Federated Multi-Task Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2024_CVPR, author = {Lu, Yuxiang and Huang, Suizhi and Yang, Yuwen and Sirejiding, Shalayiding and Ding, Yue and Lu, Hongtao}, title = {FedHCA2: Towards Hetero-Client Federated Multi-Task Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5599-5609} }
Motion Diversification Networks-
[pdf]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Hee Jae and Ohn-Bar, Eshed}, title = {Motion Diversification Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1650-1660} }
Telling Left from Right: Identifying Geometry-Aware Semantic Correspondence-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Junyi and Herrmann, Charles and Hur, Junhwa and Chen, Eric and Jampani, Varun and Sun, Deqing and Yang, Ming-Hsuan}, title = {Telling Left from Right: Identifying Geometry-Aware Semantic Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3076-3085} }
PAIR Diffusion: A Comprehensive Multimodal Object-Level Image Editor-
[pdf]
[supp]
[bibtex]@InProceedings{Goel_2024_CVPR, author = {Goel, Vidit and Peruzzo, Elia and Jiang, Yifan and Xu, Dejia and Xu, Xingqian and Sebe, Nicu and Darrell, Trevor and Wang, Zhangyang and Shi, Humphrey}, title = {PAIR Diffusion: A Comprehensive Multimodal Object-Level Image Editor}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8609-8618} }
TokenCompose: Text-to-Image Diffusion with Token-level Supervision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Zirui and Sha, Zhizhou and Ding, Zheng and Wang, Yilin and Tu, Zhuowen}, title = {TokenCompose: Text-to-Image Diffusion with Token-level Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8553-8564} }
FINER: Flexible Spectral-bias Tuning in Implicit NEural Representation by Variable-periodic Activation Functions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Zhen and Zhu, Hao and Zhang, Qi and Fu, Jingde and Deng, Weibing and Ma, Zhan and Guo, Yanwen and Cao, Xun}, title = {FINER: Flexible Spectral-bias Tuning in Implicit NEural Representation by Variable-periodic Activation Functions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2713-2722} }
TextCraftor: Your Text Encoder Can be Image Quality Controller-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Yanyu and Liu, Xian and Kag, Anil and Hu, Ju and Idelbayev, Yerlan and Sagar, Dhritiman and Wang, Yanzhi and Tulyakov, Sergey and Ren, Jian}, title = {TextCraftor: Your Text Encoder Can be Image Quality Controller}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7985-7995} }
IMPRINT: Generative Object Compositing by Learning Identity-Preserving Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2024_CVPR, author = {Song, Yizhi and Zhang, Zhifei and Lin, Zhe and Cohen, Scott and Price, Brian and Zhang, Jianming and Kim, Soo Ye and Zhang, He and Xiong, Wei and Aliaga, Daniel}, title = {IMPRINT: Generative Object Compositing by Learning Identity-Preserving Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8048-8058} }
Portrait4D: Learning One-Shot 4D Head Avatar Synthesis using Synthetic Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deng_2024_CVPR, author = {Deng, Yu and Wang, Duomin and Ren, Xiaohang and Chen, Xingyu and Wang, Baoyuan}, title = {Portrait4D: Learning One-Shot 4D Head Avatar Synthesis using Synthetic Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7119-7130} }
ConvoFusion: Multi-Modal Conversational Diffusion for Co-Speech Gesture Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mughal_2024_CVPR, author = {Mughal, Muhammad Hamza and Dabral, Rishabh and Habibie, Ikhsanul and Donatelli, Lucia and Habermann, Marc and Theobalt, Christian}, title = {ConvoFusion: Multi-Modal Conversational Diffusion for Co-Speech Gesture Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1388-1398} }
Boosting Neural Representations for Videos with a Conditional Decoder-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Xinjie and Yang, Ren and He, Dailan and Ge, Xingtong and Xu, Tongda and Wang, Yan and Qin, Hongwei and Zhang, Jun}, title = {Boosting Neural Representations for Videos with a Conditional Decoder}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2556-2566} }
From Audio to Photoreal Embodiment: Synthesizing Humans in Conversations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ng_2024_CVPR, author = {Ng, Evonne and Romero, Javier and Bagautdinov, Timur and Bai, Shaojie and Darrell, Trevor and Kanazawa, Angjoo and Richard, Alexander}, title = {From Audio to Photoreal Embodiment: Synthesizing Humans in Conversations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1001-1010} }
Single-View Scene Point Cloud Human Grasp Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Yan-Kang and Xing, Chengyi and Wei, Yi-Lin and Wu, Xiao-Ming and Zheng, Wei-Shi}, title = {Single-View Scene Point Cloud Human Grasp Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {831-841} }
One-step Diffusion with Distribution Matching Distillation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yin_2024_CVPR, author = {Yin, Tianwei and Gharbi, Micha\"el and Zhang, Richard and Shechtman, Eli and Durand, Fr\'edo and Freeman, William T. and Park, Taesung}, title = {One-step Diffusion with Distribution Matching Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6613-6623} }
Rethinking Human Motion Prediction with Symplectic Integral-
[pdf]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Haipeng and Lyu, Kedi and Liu, Zhenguang and Yin, Yifang and Yang, Xun and Lyu, Yingda}, title = {Rethinking Human Motion Prediction with Symplectic Integral}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2134-2143} }
CPGA: Coding Priors-Guided Aggregation Network for Compressed Video Quality Enhancement-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Qiang and Hao, Jinhua and Ding, Yukang and Liu, Yu and Mo, Qiao and Sun, Ming and Zhou, Chao and Zhu, Shuyuan}, title = {CPGA: Coding Priors-Guided Aggregation Network for Compressed Video Quality Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2964-2974} }
MicroCinema: A Divide-and-Conquer Approach for Text-to-Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Yanhui and Bao, Jianmin and Weng, Wenming and Feng, Ruoyu and Yin, Dacheng and Yang, Tao and Zhang, Jingxu and Dai, Qi and Zhao, Zhiyuan and Wang, Chunyu and Qiu, Kai and Yuan, Yuhui and Sun, Xiaoyan and Luo, Chong and Guo, Baining}, title = {MicroCinema: A Divide-and-Conquer Approach for Text-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8414-8424} }
Structure Matters: Tackling the Semantic Discrepancy in Diffusion Models for Image Inpainting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Haipeng and Wang, Yang and Qian, Biao and Wang, Meng and Rui, Yong}, title = {Structure Matters: Tackling the Semantic Discrepancy in Diffusion Models for Image Inpainting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8038-8047} }
Makeup Prior Models for 3D Facial Makeup Estimation and Applications-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Xingchao and Taketomi, Takafumi and Endo, Yuki and Kanamori, Yoshihiro}, title = {Makeup Prior Models for 3D Facial Makeup Estimation and Applications}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2165-2176} }
I'M HOI: Inertia-aware Monocular Capture of 3D Human-Object Interactions-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Chengfeng and Zhang, Juze and Du, Jiashen and Shan, Ziwei and Wang, Junye and Yu, Jingyi and Wang, Jingya and Xu, Lan}, title = {I'M HOI: Inertia-aware Monocular Capture of 3D Human-Object Interactions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {729-741} }
Dynamic Policy-Driven Adaptive Multi-Instance Learning for Whole Slide Image Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2024_CVPR, author = {Zheng, Tingting and Jiang, Kui and Yao, Hongxun}, title = {Dynamic Policy-Driven Adaptive Multi-Instance Learning for Whole Slide Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8028-8037} }
LiDAR4D: Dynamic Neural Fields for Novel Space-time View LiDAR Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2024_CVPR, author = {Zheng, Zehan and Lu, Fan and Xue, Weiyi and Chen, Guang and Jiang, Changjun}, title = {LiDAR4D: Dynamic Neural Fields for Novel Space-time View LiDAR Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5145-5154} }
Exploiting Diffusion Prior for Generalizable Dense Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2024_CVPR, author = {Lee, Hsin-Ying and Tseng, Hung-Yu and Lee, Hsin-Ying and Yang, Ming-Hsuan}, title = {Exploiting Diffusion Prior for Generalizable Dense Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7861-7871} }
Orthogonal Adaptation for Modular Customization of Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Po_2024_CVPR, author = {Po, Ryan and Yang, Guandao and Aberman, Kfir and Wetzstein, Gordon}, title = {Orthogonal Adaptation for Modular Customization of Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7964-7973} }
Optimizing Diffusion Noise Can Serve As Universal Motion Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Karunratanakul_2024_CVPR, author = {Karunratanakul, Korrawe and Preechakul, Konpat and Aksan, Emre and Beeler, Thabo and Suwajanakorn, Supasorn and Tang, Siyu}, title = {Optimizing Diffusion Noise Can Serve As Universal Motion Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1334-1345} }
OVFoodSeg: Elevating Open-Vocabulary Food Image Segmentation via Image-Informed Textual Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Xiongwei and Yu, Sicheng and Lim, Ee-Peng and Ngo, Chong-Wah}, title = {OVFoodSeg: Elevating Open-Vocabulary Food Image Segmentation via Image-Informed Textual Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4144-4153} }
XFeat: Accelerated Features for Lightweight Image Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Potje_2024_CVPR, author = {Potje, Guilherme and Cadar, Felipe and Araujo, Andr\'e and Martins, Renato and Nascimento, Erickson R.}, title = {XFeat: Accelerated Features for Lightweight Image Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2682-2691} }
VideoRF: Rendering Dynamic Radiance Fields as 2D Feature Video Streams-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Liao and Yao, Kaixin and Guo, Chengcheng and Zhang, Zhirui and Hu, Qiang and Yu, Jingyi and Xu, Lan and Wu, Minye}, title = {VideoRF: Rendering Dynamic Radiance Fields as 2D Feature Video Streams}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {470-481} }
DPHMs: Diffusion Parametric Head Models for Depth-based Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2024_CVPR, author = {Tang, Jiapeng and Dai, Angela and Nie, Yinyu and Markhasin, Lev and Thies, Justus and Nie{\ss}ner, Matthias}, title = {DPHMs: Diffusion Parametric Head Models for Depth-based Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1111-1122} }
DetDiffusion: Synergizing Generative and Perceptive Models for Enhanced Data Generation and Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Yibo and Gao, Ruiyuan and Chen, Kai and Zhou, Kaiqiang and Cai, Yingjie and Hong, Lanqing and Li, Zhenguo and Jiang, Lihui and Yeung, Dit-Yan and Xu, Qiang and Zhang, Kai}, title = {DetDiffusion: Synergizing Generative and Perceptive Models for Enhanced Data Generation and Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7246-7255} }
Perception-Oriented Video Frame Interpolation via Asymmetric Blending-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Guangyang and Tao, Xin and Li, Changlin and Wang, Wenyi and Liu, Xiaohong and Zheng, Qingqing}, title = {Perception-Oriented Video Frame Interpolation via Asymmetric Blending}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2753-2762} }
DUDF: Differentiable Unsigned Distance Fields with Hyperbolic Scaling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fainstein_2024_CVPR, author = {Fainstein, Miguel and Siless, Viviana and Iarussi, Emmanuel}, title = {DUDF: Differentiable Unsigned Distance Fields with Hyperbolic Scaling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4484-4493} }
2S-UDF: A Novel Two-stage UDF Learning Method for Robust Non-watertight Model Reconstruction from Multi-view Images-
[pdf]
[supp]
[bibtex]@InProceedings{Deng_2024_CVPR, author = {Deng, Junkai and Hou, Fei and Chen, Xuhui and Wang, Wencheng and He, Ying}, title = {2S-UDF: A Novel Two-stage UDF Learning Method for Robust Non-watertight Model Reconstruction from Multi-view Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5084-5093} }
UniVS: Unified and Universal Video Segmentation with Prompts as Queries-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Minghan and Li, Shuai and Zhang, Xindong and Zhang, Lei}, title = {UniVS: Unified and Universal Video Segmentation with Prompts as Queries}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3227-3238} }
Efficiently Assemble Normalization Layers and Regularization for Federated Domain Generalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Le_2024_CVPR, author = {Le, Khiem and Ho, Long and Do, Cuong and Le-Phuoc, Danh and Wong, Kok-Seng}, title = {Efficiently Assemble Normalization Layers and Regularization for Federated Domain Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6027-6036} }
Depth Information Assisted Collaborative Mutual Promotion Network for Single Image Dehazing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yafei and Zhou, Shen and Li, Huafeng}, title = {Depth Information Assisted Collaborative Mutual Promotion Network for Single Image Dehazing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2846-2855} }
Unlocking the Potential of Pre-trained Vision Transformers for Few-Shot Semantic Segmentation through Relationship Descriptors-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Ziqin and Xu, Hai-Ming and Shu, Yangyang and Liu, Lingqiao}, title = {Unlocking the Potential of Pre-trained Vision Transformers for Few-Shot Semantic Segmentation through Relationship Descriptors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3817-3827} }
CustomListener: Text-guided Responsive Interaction for User-friendly Listening Head Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Xi and Guo, Ying and Zhen, Cheng and Li, Tong and Ao, Yingying and Yan, Pengfei}, title = {CustomListener: Text-guided Responsive Interaction for User-friendly Listening Head Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2415-2424} }
Fun with Flags: Robust Principal Directions via Flag Manifolds-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mankovich_2024_CVPR, author = {Mankovich, Nathan and Camps-Valls, Gustau and Birdal, Tolga}, title = {Fun with Flags: Robust Principal Directions via Flag Manifolds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {330-340} }
Generating Non-Stationary Textures using Self-Rectification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Yang and Xiao, Rongjun and Lischinski, Dani and Cohen-Or, Daniel and Huang, Hui}, title = {Generating Non-Stationary Textures using Self-Rectification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7767-7776} }
SPU-PMD: Self-Supervised Point Cloud Upsampling via Progressive Mesh Deformation-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Yanzhe and Chen, Rong and Li, Yushi and Li, Yixi and Tan, Xuehou}, title = {SPU-PMD: Self-Supervised Point Cloud Upsampling via Progressive Mesh Deformation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5188-5197} }
Snap Video: Scaled Spatiotemporal Transformers for Text-to-Video Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Menapace_2024_CVPR, author = {Menapace, Willi and Siarohin, Aliaksandr and Skorokhodov, Ivan and Deyneka, Ekaterina and Chen, Tsai-Shien and Kag, Anil and Fang, Yuwei and Stoliar, Aleksei and Ricci, Elisa and Ren, Jian and Tulyakov, Sergey}, title = {Snap Video: Scaled Spatiotemporal Transformers for Text-to-Video Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7038-7048} }
JointSQ: Joint Sparsification-Quantization for Distributed Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2024_CVPR, author = {Xie, Weiying and Li, Haowei and Ma, Jitao and Li, Yunsong and Lei, Jie and Liu, Donglai and Fang, Leyuan}, title = {JointSQ: Joint Sparsification-Quantization for Distributed Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5778-5787} }
A Unified Framework for Human-centric Point Cloud Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Yiteng and Ye, Kecheng and Han, Xiao and Ren, Yiming and Zhu, Xinge and Ma, Yuexin}, title = {A Unified Framework for Human-centric Point Cloud Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1155-1164} }
Shadow-Enlightened Image Outpainting-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Hang and Li, Ruilin and Xie, Shaorong and Qiu, Jiayan}, title = {Shadow-Enlightened Image Outpainting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7850-7860} }
BOTH2Hands: Inferring 3D Hands from Both Text Prompts and Body Dynamics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Wenqian and Huang, Molin and Zhou, Yuxuan and Zhang, Juze and Yu, Jingyi and Wang, Jingya and Xu, Lan}, title = {BOTH2Hands: Inferring 3D Hands from Both Text Prompts and Body Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2393-2404} }
DreamAvatar: Text-and-Shape Guided 3D Human Avatar Generation via Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2024_CVPR, author = {Cao, Yukang and Cao, Yan-Pei and Han, Kai and Shan, Ying and Wong, Kwan-Yee K.}, title = {DreamAvatar: Text-and-Shape Guided 3D Human Avatar Generation via Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {958-968} }
Bidirectional Autoregessive Diffusion Model for Dance Generation-
[pdf]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Canyu and Tang, Youbao and Zhang, Ning and Lin, Ruei-Sung and Han, Mei and Xiao, Jing and Wang, Song}, title = {Bidirectional Autoregessive Diffusion Model for Dance Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {687-696} }
FRESCO: Spatial-Temporal Correspondence for Zero-Shot Video Translation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Shuai and Zhou, Yifan and Liu, Ziwei and Loy, Chen Change}, title = {FRESCO: Spatial-Temporal Correspondence for Zero-Shot Video Translation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8703-8712} }
SplattingAvatar: Realistic Real-Time Human Avatars with Mesh-Embedded Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shao_2024_CVPR, author = {Shao, Zhijing and Wang, Zhaolong and Li, Zhuang and Wang, Duotun and Lin, Xiangru and Zhang, Yu and Fan, Mingming and Wang, Zeyu}, title = {SplattingAvatar: Realistic Real-Time Human Avatars with Mesh-Embedded Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1606-1616} }
MoSAR: Monocular Semi-Supervised Model for Avatar Reconstruction using Differentiable Shading-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dib_2024_CVPR, author = {Dib, Abdallah and Hafemann, Luiz Gustavo and Got, Emeline and Anderson, Trevor and Fadaeinejad, Amin and Cruz, Rafael M. O. and Carbonneau, Marc-Andr\'e}, title = {MoSAR: Monocular Semi-Supervised Model for Avatar Reconstruction using Differentiable Shading}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1770-1780} }
RankED: Addressing Imbalance and Uncertainty in Edge Detection Using Ranking-based Losses-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cetinkaya_2024_CVPR, author = {Cetinkaya, Bedrettin and Kalkan, Sinan and Akbas, Emre}, title = {RankED: Addressing Imbalance and Uncertainty in Edge Detection Using Ranking-based Losses}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3239-3249} }
DiffHuman: Probabilistic Photorealistic 3D Reconstruction of Humans-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sengupta_2024_CVPR, author = {Sengupta, Akash and Alldieck, Thiemo and Kolotouros, Nikos and Corona, Enric and Zanfir, Andrei and Sminchisescu, Cristian}, title = {DiffHuman: Probabilistic Photorealistic 3D Reconstruction of Humans}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1439-1449} }
Permutation Equivariance of Transformers and Its Applications-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Hengyuan and Xiang, Liyao and Ye, Hangyu and Yao, Dixi and Chu, Pengzhi and Li, Baochun}, title = {Permutation Equivariance of Transformers and Its Applications}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5987-5996} }
SVDTree: Semantic Voxel Diffusion for Single Image Tree Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Yuan and Liu, Zhihao and Benes, Bedrich and Zhang, Xiaopeng and Guo, Jianwei}, title = {SVDTree: Semantic Voxel Diffusion for Single Image Tree Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4692-4702} }
Rethinking FID: Towards a Better Evaluation Metric for Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jayasumana_2024_CVPR, author = {Jayasumana, Sadeep and Ramalingam, Srikumar and Veit, Andreas and Glasner, Daniel and Chakrabarti, Ayan and Kumar, Sanjiv}, title = {Rethinking FID: Towards a Better Evaluation Metric for Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9307-9315} }
SuperPrimitive: Scene Reconstruction at a Primitive Level-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mazur_2024_CVPR, author = {Mazur, Kirill and Bae, Gwangbin and Davison, Andrew J.}, title = {SuperPrimitive: Scene Reconstruction at a Primitive Level}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4979-4989} }
TFMQ-DM: Temporal Feature Maintenance Quantization for Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Yushi and Gong, Ruihao and Liu, Jing and Chen, Tianlong and Liu, Xianglong}, title = {TFMQ-DM: Temporal Feature Maintenance Quantization for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7362-7371} }
CONFORM: Contrast is All You Need for High-Fidelity Text-to-Image Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Meral_2024_CVPR, author = {Meral, Tuna Han Salih and Simsar, Enis and Tombari, Federico and Yanardag, Pinar}, title = {CONFORM: Contrast is All You Need for High-Fidelity Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9005-9014} }
Self-Supervised Facial Representation Learning with Facial Region Awareness-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2024_CVPR, author = {Gao, Zheng and Patras, Ioannis}, title = {Self-Supervised Facial Representation Learning with Facial Region Awareness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2081-2092} }
GaussianDreamer: Fast Generation from Text to 3D Gaussians by Bridging 2D and 3D Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yi_2024_CVPR, author = {Yi, Taoran and Fang, Jiemin and Wang, Junjie and Wu, Guanjun and Xie, Lingxi and Zhang, Xiaopeng and Liu, Wenyu and Tian, Qi and Wang, Xinggang}, title = {GaussianDreamer: Fast Generation from Text to 3D Gaussians by Bridging 2D and 3D Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6796-6807} }
Open-Vocabulary Attention Maps with Token Optimization for Semantic Segmentation in Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Marcos-Manchon_2024_CVPR, author = {Marcos-Manch\'on, Pablo and Alcover-Couso, Roberto and SanMiguel, Juan C. and Mart{\'\i}nez, Jos\'e M.}, title = {Open-Vocabulary Attention Maps with Token Optimization for Semantic Segmentation in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9242-9252} }
DreamComposer: Controllable 3D Object Generation via Multi-View Conditions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Yunhan and Huang, Yukun and Wu, Xiaoyang and Guo, Yuan-Chen and Zhang, Song-Hai and Zhao, Hengshuang and He, Tong and Liu, Xihui}, title = {DreamComposer: Controllable 3D Object Generation via Multi-View Conditions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8111-8120} }
Self-Calibrating Vicinal Risk Minimisation for Model Calibration-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Jiawei and Ye, Changkun and Cui, Ruikai and Barnes, Nick}, title = {Self-Calibrating Vicinal Risk Minimisation for Model Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3335-3345} }
LPSNet: End-to-End Human Pose and Shape Estimation with Lensless Imaging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ge_2024_CVPR, author = {Ge, Haoyang and Feng, Qiao and Jia, Hailong and Li, Xiongzheng and Yin, Xiangjun and Zhou, You and Yang, Jingyu and Li, Kun}, title = {LPSNet: End-to-End Human Pose and Shape Estimation with Lensless Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1471-1480} }
Towards a Simultaneous and Granular Identity-Expression Control in Personalized Face Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Renshuai and Ma, Bowen and Zhang, Wei and Hu, Zhipeng and Fan, Changjie and Lv, Tangjie and Ding, Yu and Cheng, Xuan}, title = {Towards a Simultaneous and Granular Identity-Expression Control in Personalized Face Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2114-2123} }
PEEKABOO: Interactive Video Generation via Masked-Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jain_2024_CVPR, author = {Jain, Yash and Nasery, Anshul and Vineet, Vibhav and Behl, Harkirat}, title = {PEEKABOO: Interactive Video Generation via Masked-Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8079-8088} }
High-fidelity Person-centric Subject-to-Image Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Yibin and Zhang, Weizhong and Zheng, Jianwei and Jin, Cheng}, title = {High-fidelity Person-centric Subject-to-Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7675-7684} }
JeDi: Joint-Image Diffusion Models for Finetuning-Free Personalized Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2024_CVPR, author = {Zeng, Yu and Patel, Vishal M. and Wang, Haochen and Huang, Xun and Wang, Ting-Chun and Liu, Ming-Yu and Balaji, Yogesh}, title = {JeDi: Joint-Image Diffusion Models for Finetuning-Free Personalized Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6786-6795} }
HandDiff: 3D Hand Pose Estimation with Diffusion on Image-Point Cloud-
[pdf]
[arXiv]
[bibtex]@InProceedings{Cheng_2024_CVPR, author = {Cheng, Wencan and Tang, Hao and Van Gool, Luc and Ko, Jong Hwan}, title = {HandDiff: 3D Hand Pose Estimation with Diffusion on Image-Point Cloud}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2274-2284} }
VP3D: Unleashing 2D Visual Prompt for Text-to-3D Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Yang and Pan, Yingwei and Yang, Haibo and Yao, Ting and Mei, Tao}, title = {VP3D: Unleashing 2D Visual Prompt for Text-to-3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4896-4905} }
Content-Style Decoupling for Unsupervised Makeup Transfer without Generating Pseudo Ground Truth-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2024_CVPR, author = {Sun, Zhaoyang and Xiong, Shengwu and Chen, Yaxiong and Rong, Yi}, title = {Content-Style Decoupling for Unsupervised Makeup Transfer without Generating Pseudo Ground Truth}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7601-7610} }
You Only Need Less Attention at Each Stage in Vision Transformers-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Shuoxi and Liu, Hanpeng and Lin, Stephen and He, Kun}, title = {You Only Need Less Attention at Each Stage in Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6057-6066} }
Generalizable Novel-View Synthesis using a Stereo Camera-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2024_CVPR, author = {Lee, Haechan and Jin, Wonjoon and Baek, Seung-Hwan and Cho, Sunghyun}, title = {Generalizable Novel-View Synthesis using a Stereo Camera}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4939-4948} }
Digital Life Project: Autonomous 3D Characters with Social Intelligence-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2024_CVPR, author = {Cai, Zhongang and Jiang, Jianping and Qing, Zhongfei and Guo, Xinying and Zhang, Mingyuan and Lin, Zhengyu and Mei, Haiyi and Wei, Chen and Wang, Ruisi and Yin, Wanqi and Pan, Liang and Fan, Xiangyu and Du, Han and Gao, Peng and Yang, Zhitao and Gao, Yang and Li, Jiaqi and Ren, Tianxiang and Wei, Yukun and Wang, Xiaogang and Loy, Chen Change and Yang, Lei and Liu, Ziwei}, title = {Digital Life Project: Autonomous 3D Characters with Social Intelligence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {582-592} }
Rethinking Prior Information Generation with CLIP for Few-Shot Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Jin and Zhang, Bingfeng and Pang, Jian and Chen, Honglong and Liu, Weifeng}, title = {Rethinking Prior Information Generation with CLIP for Few-Shot Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3941-3951} }
Generative Rendering: Controllable 4D-Guided Video Generation with 2D Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2024_CVPR, author = {Cai, Shengqu and Ceylan, Duygu and Gadelha, Matheus and Huang, Chun-Hao Paul and Wang, Tuanfeng Yang and Wetzstein, Gordon}, title = {Generative Rendering: Controllable 4D-Guided Video Generation with 2D Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7611-7620} }
Relightable Gaussian Codec Avatars-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Saito_2024_CVPR, author = {Saito, Shunsuke and Schwartz, Gabriel and Simon, Tomas and Li, Junxuan and Nam, Giljoo}, title = {Relightable Gaussian Codec Avatars}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {130-141} }
Single-to-Dual-View Adaptation for Egocentric 3D Hand Pose Estimation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Ruicong and Ohkawa, Takehiko and Zhang, Mingfang and Sato, Yoichi}, title = {Single-to-Dual-View Adaptation for Egocentric 3D Hand Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {677-686} }
Animate Anyone: Consistent and Controllable Image-to-Video Synthesis for Character Animation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2024_CVPR, author = {Hu, Li}, title = {Animate Anyone: Consistent and Controllable Image-to-Video Synthesis for Character Animation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8153-8163} }
FreeCustom: Tuning-Free Customized Image Generation for Multi-Concept Composition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ding_2024_CVPR, author = {Ding, Ganggui and Zhao, Canyu and Wang, Wen and Yang, Zhen and Liu, Zide and Chen, Hao and Shen, Chunhua}, title = {FreeCustom: Tuning-Free Customized Image Generation for Multi-Concept Composition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9089-9098} }
MaskINT: Video Editing via Interpolative Non-autoregressive Masked Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2024_CVPR, author = {Ma, Haoyu and Mahdizadehaghdam, Shahin and Wu, Bichen and Fan, Zhipeng and Gu, Yuchao and Zhao, Wenliang and Shapira, Lior and Xie, Xiaohui}, title = {MaskINT: Video Editing via Interpolative Non-autoregressive Masked Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7403-7412} }
Learning Multi-Dimensional Human Preference for Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Sixian and Wang, Bohan and Wu, Junqiang and Li, Yan and Gao, Tingting and Zhang, Di and Wang, Zhongyuan}, title = {Learning Multi-Dimensional Human Preference for Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8018-8027} }
ViVid-1-to-3: Novel View Synthesis with Video Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Kwak_2024_CVPR, author = {Kwak, Jeong-gi and Dong, Erqun and Jin, Yuhe and Ko, Hanseok and Mahajan, Shweta and Yi, Kwang Moo}, title = {ViVid-1-to-3: Novel View Synthesis with Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6775-6785} }
Generating Human Motion in 3D Scenes from Text Descriptions-
[pdf]
[arXiv]
[bibtex]@InProceedings{Cen_2024_CVPR, author = {Cen, Zhi and Pi, Huaijin and Peng, Sida and Shen, Zehong and Yang, Minghui and Zhu, Shuai and Bao, Hujun and Zhou, Xiaowei}, title = {Generating Human Motion in 3D Scenes from Text Descriptions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1855-1866} }
QDFormer: Towards Robust Audiovisual Segmentation in Complex Environments with Quantization-based Semantic Decomposition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Xiang and Wang, Jinglu and Xu, Xiaohao and Peng, Xiulian and Singh, Rita and Lu, Yan and Raj, Bhiksha}, title = {QDFormer: Towards Robust Audiovisual Segmentation in Complex Environments with Quantization-based Semantic Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3402-3413} }
Fast Adaptation for Human Pose Estimation via Meta-Optimization-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2024_CVPR, author = {Hu, Shengxiang and Sun, Huaijiang and Li, Bin and Wei, Dong and Li, Weiqing and Lu, Jianfeng}, title = {Fast Adaptation for Human Pose Estimation via Meta-Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1792-1801} }
WOUAF: Weight Modulation for User Attribution and Fingerprinting in Text-to-Image Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Changhoon and Min, Kyle and Patel, Maitreya and Cheng, Sheng and Yang, Yezhou}, title = {WOUAF: Weight Modulation for User Attribution and Fingerprinting in Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8974-8983} }
Text-Conditioned Generative Model of 3D Strand-based Human Hairstyles-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sklyarova_2024_CVPR, author = {Sklyarova, Vanessa and Zakharov, Egor and Hilliges, Otmar and Black, Michael J. and Thies, Justus}, title = {Text-Conditioned Generative Model of 3D Strand-based Human Hairstyles}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4703-4712} }
Skeleton-in-Context: Unified Skeleton Sequence Modeling with In-Context Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Xinshun and Fang, Zhongbin and Li, Xia and Li, Xiangtai and Chen, Chen and Liu, Mengyuan}, title = {Skeleton-in-Context: Unified Skeleton Sequence Modeling with In-Context Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2436-2446} }
DemoFusion: Democratising High-Resolution Image Generation With No $$$-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Du_2024_CVPR, author = {Du, Ruoyi and Chang, Dongliang and Hospedales, Timothy and Song, Yi-Zhe and Ma, Zhanyu}, title = {DemoFusion: Democratising High-Resolution Image Generation With No \$\$\$}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6159-6168} }
Total Selfie: Generating Full-Body Selfies-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Bowei and Curless, Brian and Kemelmacher-Shlizerman, Ira and Seitz, Steven M.}, title = {Total Selfie: Generating Full-Body Selfies}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6701-6711} }
Learning Structure-from-Motion with Graph Attention Networks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Brynte_2024_CVPR, author = {Brynte, Lucas and Iglesias, Jos\'e Pedro and Olsson, Carl and Kahl, Fredrik}, title = {Learning Structure-from-Motion with Graph Attention Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4808-4817} }
Geometry Transfer for Stylizing Radiance Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jung_2024_CVPR, author = {Jung, Hyunyoung and Nam, Seonghyeon and Sarafianos, Nikolaos and Yoo, Sungjoo and Sorkine-Hornung, Alexander and Ranjan, Rakesh}, title = {Geometry Transfer for Stylizing Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8565-8575} }
Holoported Characters: Real-time Free-viewpoint Rendering of Humans from Sparse RGB Cameras-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shetty_2024_CVPR, author = {Shetty, Ashwath and Habermann, Marc and Sun, Guoxing and Luvizon, Diogo and Golyanik, Vladislav and Theobalt, Christian}, title = {Holoported Characters: Real-time Free-viewpoint Rendering of Humans from Sparse RGB Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1206-1215} }
SEAS: ShapE-Aligned Supervision for Person Re-Identification-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Haidong and Budhwant, Pranav and Zheng, Zhaoheng and Nevatia, Ram}, title = {SEAS: ShapE-Aligned Supervision for Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {164-174} }
Making Vision Transformers Truly Shift-Equivariant-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rojas-Gomez_2024_CVPR, author = {Rojas-Gomez, Renan A. and Lim, Teck-Yian and Do, Minh N. and Yeh, Raymond A.}, title = {Making Vision Transformers Truly Shift-Equivariant}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5568-5577} }
SpikeNeRF: Learning Neural Radiance Fields from Continuous Spike Stream-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Lin and Jia, Kangmin and Zhao, Yifan and Qi, Yunshan and Wang, Lizhi and Huang, Hua}, title = {SpikeNeRF: Learning Neural Radiance Fields from Continuous Spike Stream}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6285-6295} }
A Semi-supervised Nighttime Dehazing Baseline with Spatial-Frequency Aware and Realistic Brightness Constraint-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cong_2024_CVPR, author = {Cong, Xiaofeng and Gui, Jie and Zhang, Jing and Hou, Junming and Shen, Hao}, title = {A Semi-supervised Nighttime Dehazing Baseline with Spatial-Frequency Aware and Realistic Brightness Constraint}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2631-2640} }
Deep Equilibrium Diffusion Restoration with Parallel Sampling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2024_CVPR, author = {Cao, Jiezhang and Shi, Yue and Zhang, Kai and Zhang, Yulun and Timofte, Radu and Van Gool, Luc}, title = {Deep Equilibrium Diffusion Restoration with Parallel Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2824-2834} }
Gaussian Shell Maps for Efficient 3D Human Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Abdal_2024_CVPR, author = {Abdal, Rameen and Yifan, Wang and Shi, Zifan and Xu, Yinghao and Po, Ryan and Kuang, Zhengfei and Chen, Qifeng and Yeung, Dit-Yan and Wetzstein, Gordon}, title = {Gaussian Shell Maps for Efficient 3D Human Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9441-9451} }
MoST: Motion Style Transformer Between Diverse Action Contents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Boeun and Kim, Jungho and Chang, Hyung Jin and Choi, Jin Young}, title = {MoST: Motion Style Transformer Between Diverse Action Contents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1705-1714} }
Prompting Hard or Hardly Prompting: Prompt Inversion for Text-to-Image Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mahajan_2024_CVPR, author = {Mahajan, Shweta and Rahman, Tanzila and Yi, Kwang Moo and Sigal, Leonid}, title = {Prompting Hard or Hardly Prompting: Prompt Inversion for Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6808-6817} }
Unmixing Before Fusion: A Generalized Paradigm for Multi-Source-based Hyperspectral Image Synthesis-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Yang and Pan, Erting and Wang, Xinya and Wu, Yuheng and Mei, Xiaoguang and Ma, Jiayi}, title = {Unmixing Before Fusion: A Generalized Paradigm for Multi-Source-based Hyperspectral Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9297-9306} }
CoDi: Conditional Diffusion Distillation for Higher-Fidelity and Faster Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mei_2024_CVPR, author = {Mei, Kangfu and Delbracio, Mauricio and Talebi, Hossein and Tu, Zhengzhong and Patel, Vishal M. and Milanfar, Peyman}, title = {CoDi: Conditional Diffusion Distillation for Higher-Fidelity and Faster Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9048-9058} }
X-Adapter: Adding Universal Compatibility of Plugins for Upgraded Diffusion Model-
[pdf]
[supp]
[bibtex]@InProceedings{Ran_2024_CVPR, author = {Ran, Lingmin and Cun, Xiaodong and Liu, Jia-Wei and Zhao, Rui and Zijie, Song and Wang, Xintao and Keppo, Jussi and Shou, Mike Zheng}, title = {X-Adapter: Adding Universal Compatibility of Plugins for Upgraded Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8775-8784} }
CADTalk: An Algorithm and Benchmark for Semantic Commenting of CAD Programs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2024_CVPR, author = {Yuan, Haocheng and Xu, Jing and Pan, Hao and Bousseau, Adrien and Mitra, Niloy J. and Li, Changjian}, title = {CADTalk: An Algorithm and Benchmark for Semantic Commenting of CAD Programs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3753-3762} }
Inversion-Free Image Editing with Language-Guided Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Sihan and Huang, Yidong and Pan, Jiayi and Ma, Ziqiao and Chai, Joyce}, title = {Inversion-Free Image Editing with Language-Guided Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9452-9461} }
HumMUSS: Human Motion Understanding using State Space Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mondal_2024_CVPR, author = {Mondal, Arnab and Alletto, Stefano and Tome, Denis}, title = {HumMUSS: Human Motion Understanding using State Space Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2318-2330} }
Drag Your Noise: Interactive Point-based Editing via Diffusion Semantic Propagation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Haofeng and Xu, Chenshu and Yang, Yifei and Zeng, Lihua and He, Shengfeng}, title = {Drag Your Noise: Interactive Point-based Editing via Diffusion Semantic Propagation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6743-6752} }
ContextSeg: Sketch Semantic Segmentation by Querying the Context with Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Jiawei and Li, Changjian}, title = {ContextSeg: Sketch Semantic Segmentation by Querying the Context with Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3679-3688} }
Taming the Tail in Class-Conditional GANs: Knowledge Sharing via Unconditional Training at Lower Resolutions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Khorram_2024_CVPR, author = {Khorram, Saeed and Jiang, Mingqi and Shahbazi, Mohamad and Danesh, Mohamad H. and Fuxin, Li}, title = {Taming the Tail in Class-Conditional GANs: Knowledge Sharing via Unconditional Training at Lower Resolutions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7580-7590} }
VideoSwap: Customized Video Subject Swapping with Interactive Semantic Point Correspondence-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gu_2024_CVPR, author = {Gu, Yuchao and Zhou, Yipin and Wu, Bichen and Yu, Licheng and Liu, Jia-Wei and Zhao, Rui and Wu, Jay Zhangjie and Zhang, David Junhao and Shou, Mike Zheng and Tang, Kevin}, title = {VideoSwap: Customized Video Subject Swapping with Interactive Semantic Point Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7621-7630} }
Hierarchical Histogram Threshold Segmentation - Auto-terminating High-detail Oversegmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Chang_2024_CVPR, author = {Chang, Thomas V. and Seibt, Simon and von Rymon Lipinski, Bartosz}, title = {Hierarchical Histogram Threshold Segmentation - Auto-terminating High-detail Oversegmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3195-3204} }
Once for Both: Single Stage of Importance and Sparsity Search for Vision Transformer Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2024_CVPR, author = {Ye, Hancheng and Yu, Chong and Ye, Peng and Xia, Renqiu and Tang, Yansong and Lu, Jiwen and Chen, Tao and Zhang, Bo}, title = {Once for Both: Single Stage of Importance and Sparsity Search for Vision Transformer Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5578-5588} }
As-Plausible-As-Possible: Plausibility-Aware Mesh Deformation Using 2D Diffusion Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Yoo_2024_CVPR, author = {Yoo, Seungwoo and Kim, Kunho and Kim, Vladimir G. and Sung, Minhyuk}, title = {As-Plausible-As-Possible: Plausibility-Aware Mesh Deformation Using 2D Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4315-4324} }
ECLIPSE: Efficient Continual Learning in Panoptic Segmentation with Visual Prompt Tuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Beomyoung and Yu, Joonsang and Hwang, Sung Ju}, title = {ECLIPSE: Efficient Continual Learning in Panoptic Segmentation with Visual Prompt Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3346-3356} }
MaGGIe: Masked Guided Gradual Human Instance Matting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huynh_2024_CVPR, author = {Huynh, Chuong and Oh, Seoung Wug and Shrivastava, Abhinav and Lee, Joon-Young}, title = {MaGGIe: Masked Guided Gradual Human Instance Matting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3870-3879} }
Towards the Uncharted: Density-Descending Feature Perturbation for Semi-supervised Semantic Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Xiaoyang and Bai, Huihui and Yu, Limin and Zhao, Yao and Xiao, Jimin}, title = {Towards the Uncharted: Density-Descending Feature Perturbation for Semi-supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3303-3312} }
RTMO: Towards High-Performance One-Stage Real-Time Multi-Person Pose Estimation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lu_2024_CVPR, author = {Lu, Peng and Jiang, Tao and Li, Yining and Li, Xiangtai and Chen, Kai and Yang, Wenming}, title = {RTMO: Towards High-Performance One-Stage Real-Time Multi-Person Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1491-1500} }
WaveFace: Authentic Face Restoration with Efficient Frequency Recovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Miao_2024_CVPR, author = {Miao, Yunqi and Deng, Jiankang and Han, Jungong}, title = {WaveFace: Authentic Face Restoration with Efficient Frequency Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6583-6592} }
UltrAvatar: A Realistic Animatable 3D Avatar Diffusion Model with Authenticity Guided Textures-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Mingyuan and Hyder, Rakib and Xuan, Ziwei and Qi, Guojun}, title = {UltrAvatar: A Realistic Animatable 3D Avatar Diffusion Model with Authenticity Guided Textures}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1238-1248} }
Attention-Propagation Network for Egocentric Heatmap to 3D Pose Lifting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2024_CVPR, author = {Kang, Taeho and Lee, Youngki}, title = {Attention-Propagation Network for Egocentric Heatmap to 3D Pose Lifting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {842-851} }
OmniMotionGPT: Animal Motion Generation with Limited Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Zhangsihao and Zhou, Mingyuan and Shan, Mengyi and Wen, Bingbing and Xuan, Ziwei and Hill, Mitch and Bai, Junjie and Qi, Guo-Jun and Wang, Yalin}, title = {OmniMotionGPT: Animal Motion Generation with Limited Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1249-1259} }
InstanceDiffusion: Instance-level Control for Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Xudong and Darrell, Trevor and Rambhatla, Sai Saketh and Girdhar, Rohit and Misra, Ishan}, title = {InstanceDiffusion: Instance-level Control for Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6232-6242} }
Unifying Top-down and Bottom-up Scanpath Prediction Using Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Zhibo and Mondal, Sounak and Ahn, Seoyoung and Xue, Ruoyu and Zelinsky, Gregory and Hoai, Minh and Samaras, Dimitris}, title = {Unifying Top-down and Bottom-up Scanpath Prediction Using Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1683-1693} }
3D-Aware Face Editing via Warping-Guided Latent Direction Learning-
[pdf]
[bibtex]@InProceedings{Cheng_2024_CVPR, author = {Cheng, Yuhao and Chen, Zhuo and Ren, Xingyu and Zhu, Wenhan and Xu, Zhengqin and Xu, Di and Yang, Changpeng and Yan, Yichao}, title = {3D-Aware Face Editing via Warping-Guided Latent Direction Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {916-926} }
CAT-Seg: Cost Aggregation for Open-Vocabulary Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Cho_2024_CVPR, author = {Cho, Seokju and Shin, Heeseong and Hong, Sunghwan and Arnab, Anurag and Seo, Paul Hongsuck and Kim, Seungryong}, title = {CAT-Seg: Cost Aggregation for Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4113-4123} }
Focus on Your Instruction: Fine-grained and Multi-instruction Image Editing by Attention Modulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2024_CVPR, author = {Guo, Qin and Lin, Tianwei}, title = {Focus on Your Instruction: Fine-grained and Multi-instruction Image Editing by Attention Modulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6986-6996} }
AvatarGPT: All-in-One Framework for Motion Understanding Planning Generation and Beyond-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Zixiang and Wan, Yu and Wang, Baoyuan}, title = {AvatarGPT: All-in-One Framework for Motion Understanding Planning Generation and Beyond}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1357-1366} }
Co-Speech Gesture Video Generation via Motion-Decoupled Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2024_CVPR, author = {He, Xu and Huang, Qiaochu and Zhang, Zhensong and Lin, Zhiwei and Wu, Zhiyong and Yang, Sicheng and Li, Minglei and Chen, Zhiyi and Xu, Songcen and Wu, Xiaofei}, title = {Co-Speech Gesture Video Generation via Motion-Decoupled Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2263-2273} }
CDFormer: When Degradation Prediction Embraces Diffusion Model for Blind Image Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Qingguo and Zhuang, Chenyi and Gao, Pan and Qin, Jie}, title = {CDFormer: When Degradation Prediction Embraces Diffusion Model for Blind Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7455-7464} }
HumanRef: Single Image to 3D Human Generation via Reference-Guided Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Jingbo and Li, Xiaoyu and Zhang, Qi and Cao, Yanpei and Shan, Ying and Liao, Jing}, title = {HumanRef: Single Image to 3D Human Generation via Reference-Guided Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1844-1854} }
Rethinking Interactive Image Segmentation with Low Latency High Quality and Diverse Prompts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Qin and Cho, Jaemin and Bansal, Mohit and Niethammer, Marc}, title = {Rethinking Interactive Image Segmentation with Low Latency High Quality and Diverse Prompts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3773-3782} }
DITTO: Dual and Integrated Latent Topologies for Implicit 3D Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shim_2024_CVPR, author = {Shim, Jaehyeok and Joo, Kyungdon}, title = {DITTO: Dual and Integrated Latent Topologies for Implicit 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5396-5405} }
HIT: Estimating Internal Human Implicit Tissues from the Body Surface-
[pdf]
[supp]
[bibtex]@InProceedings{Keller_2024_CVPR, author = {Keller, Marilyn and Arora, Vaibhav and Dakri, Abdelmouttaleb and Chandhok, Shivam and Machann, J\"urgen and Fritsche, Andreas and Black, Michael J. and Pujades, Sergi}, title = {HIT: Estimating Internal Human Implicit Tissues from the Body Surface}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3480-3490} }
DanceCamera3D: 3D Camera Movement Synthesis with Music and Dance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Zixuan and Jia, Jia and Sun, Shikun and Wu, Haozhe and Han, Rong and Li, Zhenyu and Tang, Di and Zhou, Jiaqing and Luo, Jiebo}, title = {DanceCamera3D: 3D Camera Movement Synthesis with Music and Dance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7892-7901} }
Cross Initialization for Face Personalization of Text-to-Image Models-
[pdf]
[supp]
[bibtex]@InProceedings{Pang_2024_CVPR, author = {Pang, Lianyu and Yin, Jian and Xie, Haoran and Wang, Qiping and Li, Qing and Mao, Xudong}, title = {Cross Initialization for Face Personalization of Text-to-Image Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8393-8403} }
LEDITS++: Limitless Image Editing using Text-to-Image Models-
[pdf]
[supp]
[bibtex]@InProceedings{Brack_2024_CVPR, author = {Brack, Manuel and Friedrich, Felix and Kornmeier, Katharia and Tsaban, Linoy and Schramowski, Patrick and Kersting, Kristian and Passos, Apolinario}, title = {LEDITS++: Limitless Image Editing using Text-to-Image Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8861-8870} }
Video Interpolation with Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jain_2024_CVPR, author = {Jain, Siddhant and Watson, Daniel and Tabellion, Eric and Ho?ynski, Aleksander and Poole, Ben and Kontkanen, Janne}, title = {Video Interpolation with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7341-7351} }
Learning Adaptive Spatial Coherent Correlations for Speech-Preserving Facial Expression Manipulation-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Tianshui and Lin, Jianman and Yang, Zhijing and Qing, Chunmei and Lin, Liang}, title = {Learning Adaptive Spatial Coherent Correlations for Speech-Preserving Facial Expression Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7267-7276} }
WHAM: Reconstructing World-grounded Humans with Accurate 3D Motion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shin_2024_CVPR, author = {Shin, Soyong and Kim, Juyong and Halilaj, Eni and Black, Michael J.}, title = {WHAM: Reconstructing World-grounded Humans with Accurate 3D Motion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2070-2080} }
DiffPerformer: Iterative Learning of Consistent Latent Guidance for Diffusion-based Human Video Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Chenyang and Zheng, Zerong and Yu, Tao and Lv, Xiaoqian and Zhong, Bineng and Zhang, Shengping and Nie, Liqiang}, title = {DiffPerformer: Iterative Learning of Consistent Latent Guidance for Diffusion-based Human Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6169-6179} }
Category-Level Multi-Part Multi-Joint 3D Shape Assembly-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Yichen and Mo, Kaichun and Duan, Yueqi and Wang, He and Zhang, Jiequan and Shao, Lin}, title = {Category-Level Multi-Part Multi-Joint 3D Shape Assembly}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3281-3291} }
One-Shot Open Affordance Learning with Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Gen and Sun, Deqing and Sevilla-Lara, Laura and Jampani, Varun}, title = {One-Shot Open Affordance Learning with Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3086-3096} }
Don't Look into the Dark: Latent Codes for Pluralistic Image Inpainting-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Haiwei and Zhao, Yajie}, title = {Don't Look into the Dark: Latent Codes for Pluralistic Image Inpainting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7591-7600} }
DiffEditor: Boosting Accuracy and Flexibility on Diffusion-based Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mou_2024_CVPR, author = {Mou, Chong and Wang, Xintao and Song, Jiechong and Shan, Ying and Zhang, Jian}, title = {DiffEditor: Boosting Accuracy and Flexibility on Diffusion-based Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8488-8497} }
InstructVideo: Instructing Video Diffusion Models with Human Feedback-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2024_CVPR, author = {Yuan, Hangjie and Zhang, Shiwei and Wang, Xiang and Wei, Yujie and Feng, Tao and Pan, Yining and Zhang, Yingya and Liu, Ziwei and Albanie, Samuel and Ni, Dong}, title = {InstructVideo: Instructing Video Diffusion Models with Human Feedback}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6463-6474} }
On the Content Bias in Frechet Video Distance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ge_2024_CVPR, author = {Ge, Songwei and Mahapatra, Aniruddha and Parmar, Gaurav and Zhu, Jun-Yan and Huang, Jia-Bin}, title = {On the Content Bias in Frechet Video Distance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7277-7288} }
Image Neural Field Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Yinbo and Wang, Oliver and Zhang, Richard and Shechtman, Eli and Wang, Xiaolong and Gharbi, Michael}, title = {Image Neural Field Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8007-8017} }
Discriminative Probing and Tuning for Text-to-Image Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Qu_2024_CVPR, author = {Qu, Leigang and Wang, Wenjie and Li, Yongqi and Zhang, Hanwang and Nie, Liqiang and Chua, Tat-Seng}, title = {Discriminative Probing and Tuning for Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7434-7444} }
Towards More Accurate Diffusion Model Acceleration with A Timestep Tuner-
[pdf]
[supp]
[bibtex]@InProceedings{Xia_2024_CVPR, author = {Xia, Mengfei and Shen, Yujun and Lei, Changsong and Zhou, Yu and Zhao, Deli and Yi, Ran and Wang, Wenping and Liu, Yong-Jin}, title = {Towards More Accurate Diffusion Model Acceleration with A Timestep Tuner}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5736-5745} }
Rethinking Generalizable Face Anti-spoofing via Hierarchical Prototype-guided Distribution Refinement in Hyperbolic Space-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2024_CVPR, author = {Hu, Chengyang and Zhang, Ke-Yue and Yao, Taiping and Ding, Shouhong and Ma, Lizhuang}, title = {Rethinking Generalizable Face Anti-spoofing via Hierarchical Prototype-guided Distribution Refinement in Hyperbolic Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1032-1041} }
GenesisTex: Adapting Image Denoising Diffusion to Texture Space-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2024_CVPR, author = {Gao, Chenjian and Jiang, Boyan and Li, Xinghui and Zhang, Yingpeng and Yu, Qian}, title = {GenesisTex: Adapting Image Denoising Diffusion to Texture Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4620-4629} }
Image-to-Image Matching via Foundation Models: A New Perspective for Open-Vocabulary Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Yuan and Sun, Rui and Luo, Naisong and Pan, Yuwen and Zhang, Tianzhu}, title = {Image-to-Image Matching via Foundation Models: A New Perspective for Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3952-3963} }
BigGait: Learning Gait Representation You Want by Large Vision Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2024_CVPR, author = {Ye, Dingqiang and Fan, Chao and Ma, Jingzhe and Liu, Xiaoming and Yu, Shiqi}, title = {BigGait: Learning Gait Representation You Want by Large Vision Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {200-210} }
HOIST-Former: Hand-held Objects Identification Segmentation and Tracking in the Wild-
[pdf]
[bibtex]@InProceedings{Narasimhaswamy_2024_CVPR, author = {Narasimhaswamy, Supreeth and Nguyen, Huy Anh and Huang, Lihan and Hoai, Minh}, title = {HOIST-Former: Hand-held Objects Identification Segmentation and Tracking in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2351-2361} }
Contextrast: Contextual Contrastive Learning for Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sung_2024_CVPR, author = {Sung, Changki and Kim, Wanhee and An, Jungho and Lee, Wooju and Lim, Hyungtae and Myung, Hyun}, title = {Contextrast: Contextual Contrastive Learning for Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3732-3742} }
AUEditNet: Dual-Branch Facial Action Unit Intensity Manipulation with Implicit Disentanglement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jin_2024_CVPR, author = {Jin, Shiwei and Wang, Zhen and Wang, Lei and Liu, Peng and Bi, Ning and Nguyen, Truong}, title = {AUEditNet: Dual-Branch Facial Action Unit Intensity Manipulation with Implicit Disentanglement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2104-2113} }
BodyMAP - Jointly Predicting Body Mesh and 3D Applied Pressure Map for People in Bed-
[pdf]
[supp]
[bibtex]@InProceedings{Tandon_2024_CVPR, author = {Tandon, Abhishek and Goyal, Anujraaj and Clever, Henry M. and Erickson, Zackory}, title = {BodyMAP - Jointly Predicting Body Mesh and 3D Applied Pressure Map for People in Bed}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2480-2489} }
KPConvX: Modernizing Kernel Point Convolution with Kernel Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Thomas_2024_CVPR, author = {Thomas, Hugues and Tsai, Yao-Hung Hubert and Barfoot, Timothy D. and Zhang, Jian}, title = {KPConvX: Modernizing Kernel Point Convolution with Kernel Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5525-5535} }
Clockwork Diffusion: Efficient Generation With Model-Step Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Habibian_2024_CVPR, author = {Habibian, Amirhossein and Ghodrati, Amir and Fathima, Noor and Sautiere, Guillaume and Garrepalli, Risheek and Porikli, Fatih and Petersen, Jens}, title = {Clockwork Diffusion: Efficient Generation With Model-Step Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8352-8361} }
Pick-or-Mix: Dynamic Channel Sampling for ConvNets-
[pdf]
[supp]
[bibtex]@InProceedings{Kumar_2024_CVPR, author = {Kumar, Ashish and Kim, Daneul and Park, Jaesik and Behera, Laxmidhar}, title = {Pick-or-Mix: Dynamic Channel Sampling for ConvNets}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5873-5882} }
DyBluRF: Dynamic Neural Radiance Fields from Blurry Monocular Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2024_CVPR, author = {Sun, Huiqiang and Li, Xingyi and Shen, Liao and Ye, Xinyi and Xian, Ke and Cao, Zhiguo}, title = {DyBluRF: Dynamic Neural Radiance Fields from Blurry Monocular Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7517-7527} }
AAMDM: Accelerated Auto-regressive Motion Diffusion Model-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Tianyu and Qiao, Calvin and Ren, Guanqiao and Yin, KangKang and Ha, Sehoon}, title = {AAMDM: Accelerated Auto-regressive Motion Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1813-1823} }
Towards Understanding Cross and Self-Attention in Stable Diffusion for Text-Guided Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Bingyan and Wang, Chengyu and Cao, Tingfeng and Jia, Kui and Huang, Jun}, title = {Towards Understanding Cross and Self-Attention in Stable Diffusion for Text-Guided Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7817-7826} }
DiverGen: Improving Instance Segmentation by Learning Wider Data Distribution with More Diverse Generative Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2024_CVPR, author = {Fan, Chengxiang and Zhu, Muzhi and Chen, Hao and Liu, Yang and Wu, Weijia and Zhang, Huaqi and Shen, Chunhua}, title = {DiverGen: Improving Instance Segmentation by Learning Wider Data Distribution with More Diverse Generative Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3986-3995} }
Learning Disentangled Identifiers for Action-Customized Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Siteng and Gong, Biao and Feng, Yutong and Chen, Xi and Fu, Yuqian and Liu, Yu and Wang, Donglin}, title = {Learning Disentangled Identifiers for Action-Customized Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7797-7806} }
Automatic Controllable Colorization via Imagination-
[pdf]
[arXiv]
[bibtex]@InProceedings{Cong_2024_CVPR, author = {Cong, Xiaoyan and Wu, Yue and Chen, Qifeng and Lei, Chenyang}, title = {Automatic Controllable Colorization via Imagination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2609-2619} }
EMOPortraits: Emotion-enhanced Multimodal One-shot Head Avatars-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Drobyshev_2024_CVPR, author = {Drobyshev, Nikita and Casademunt, Antoni Bigata and Vougioukas, Konstantinos and Landgraf, Zoe and Petridis, Stavros and Pantic, Maja}, title = {EMOPortraits: Emotion-enhanced Multimodal One-shot Head Avatars}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8498-8507} }
Open3DIS: Open-Vocabulary 3D Instance Segmentation with 2D Mask Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2024_CVPR, author = {Nguyen, Phuc and Ngo, Tuan Duc and Kalogerakis, Evangelos and Gan, Chuang and Tran, Anh and Pham, Cuong and Nguyen, Khoi}, title = {Open3DIS: Open-Vocabulary 3D Instance Segmentation with 2D Mask Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4018-4028} }
ControlRoom3D: Room Generation using Semantic Proxy Rooms-
[pdf]
[supp]
[bibtex]@InProceedings{Schult_2024_CVPR, author = {Schult, Jonas and Tsai, Sam and H\"ollein, Lukas and Wu, Bichen and Wang, Jialiang and Ma, Chih-Yao and Li, Kunpeng and Wang, Xiaofang and Wimbauer, Felix and He, Zijian and Zhang, Peizhao and Leibe, Bastian and Vajda, Peter and Hou, Ji}, title = {ControlRoom3D: Room Generation using Semantic Proxy Rooms}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6201-6210} }
UniPTS: A Unified Framework for Proficient Post-Training Sparsity-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xie_2024_CVPR, author = {Xie, Jingjing and Zhang, Yuxin and Lin, Mingbao and Cao, Liujuan and Ji, Rongrong}, title = {UniPTS: A Unified Framework for Proficient Post-Training Sparsity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5746-5755} }
HumanNorm: Learning Normal Diffusion Model for High-quality and Realistic 3D Human Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Xin and Shao, Ruizhi and Zhang, Qi and Zhang, Hongwen and Feng, Ying and Liu, Yebin and Wang, Qing}, title = {HumanNorm: Learning Normal Diffusion Model for High-quality and Realistic 3D Human Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4568-4577} }
Cross-view and Cross-pose Completion for 3D Human Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Armando_2024_CVPR, author = {Armando, Matthieu and Galaaoui, Salma and Baradel, Fabien and Lucas, Thomas and Leroy, Vincent and Br\'egier, Romain and Weinzaepfel, Philippe and Rogez, Gr\'egory}, title = {Cross-view and Cross-pose Completion for 3D Human Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1512-1523} }
Efficient Scene Recovery Using Luminous Flux Prior-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Zhongyu and Zhang, Lei}, title = {Efficient Scene Recovery Using Luminous Flux Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2743-2752} }
Customize your NeRF: Adaptive Source Driven 3D Scene Editing via Local-Global Iterative Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2024_CVPR, author = {He, Runze and Huang, Shaofei and Nie, Xuecheng and Hui, Tianrui and Liu, Luoqi and Dai, Jiao and Han, Jizhong and Li, Guanbin and Liu, Si}, title = {Customize your NeRF: Adaptive Source Driven 3D Scene Editing via Local-Global Iterative Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6966-6975} }
Spherical Mask: Coarse-to-Fine 3D Point Cloud Instance Segmentation with Spherical Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shin_2024_CVPR, author = {Shin, Sangyun and Zhou, Kaichen and Vankadari, Madhu and Markham, Andrew and Trigoni, Niki}, title = {Spherical Mask: Coarse-to-Fine 3D Point Cloud Instance Segmentation with Spherical Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4060-4069} }
FSRT: Facial Scene Representation Transformer for Face Reenactment from Factorized Appearance Head-pose and Facial Expression Features-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rochow_2024_CVPR, author = {Rochow, Andre and Schwarz, Max and Behnke, Sven}, title = {FSRT: Facial Scene Representation Transformer for Face Reenactment from Factorized Appearance Head-pose and Facial Expression Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7716-7726} }
TetraSphere: A Neural Descriptor for O(3)-Invariant Point Cloud Analysis-
[pdf]
[supp]
[bibtex]@InProceedings{Melnyk_2024_CVPR, author = {Melnyk, Pavlo and Robinson, Andreas and Felsberg, Michael and Wadenb\"ack, M\r{a}rten}, title = {TetraSphere: A Neural Descriptor for O(3)-Invariant Point Cloud Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5620-5630} }
WANDR: Intention-guided Human Motion Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Diomataris_2024_CVPR, author = {Diomataris, Markos and Athanasiou, Nikos and Taheri, Omid and Wang, Xi and Hilliges, Otmar and Black, Michael J.}, title = {WANDR: Intention-guided Human Motion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {927-936} }
GroupContrast: Semantic-aware Self-supervised Representation Learning for 3D Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Chengyao and Jiang, Li and Wu, Xiaoyang and Tian, Zhuotao and Peng, Bohao and Zhao, Hengshuang and Jia, Jiaya}, title = {GroupContrast: Semantic-aware Self-supervised Representation Learning for 3D Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4917-4928} }
Privacy-Preserving Face Recognition Using Trainable Feature Subtraction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mi_2024_CVPR, author = {Mi, Yuxi and Zhong, Zhizhou and Huang, Yuge and Ji, Jiazhen and Xu, Jianqing and Wang, Jun and Wang, Shaoming and Ding, Shouhong and Zhou, Shuigeng}, title = {Privacy-Preserving Face Recognition Using Trainable Feature Subtraction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {297-307} }
Learning Visual Prompt for Gait Recognition-
[pdf]
[bibtex]@InProceedings{Ma_2024_CVPR, author = {Ma, Kang and Fu, Ying and Cao, Chunshui and Hou, Saihui and Huang, Yongzhen and Zheng, Dezhi}, title = {Learning Visual Prompt for Gait Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {593-603} }
SC-GS: Sparse-Controlled Gaussian Splatting for Editable Dynamic Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Yi-Hua and Sun, Yang-Tian and Yang, Ziyi and Lyu, Xiaoyang and Cao, Yan-Pei and Qi, Xiaojuan}, title = {SC-GS: Sparse-Controlled Gaussian Splatting for Editable Dynamic Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4220-4230} }
Tri-Modal Motion Retrieval by Learning a Joint Embedding Space-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yin_2024_CVPR, author = {Yin, Kangning and Zou, Shihao and Ge, Yuxuan and Tian, Zheng}, title = {Tri-Modal Motion Retrieval by Learning a Joint Embedding Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1596-1605} }
Geometry-aware Reconstruction and Fusion-refined Rendering for Generalizable Neural Radiance Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Tianqi and Ye, Xinyi and Shi, Min and Huang, Zihao and Pan, Zhiyu and Peng, Zhan and Cao, Zhiguo}, title = {Geometry-aware Reconstruction and Fusion-refined Rendering for Generalizable Neural Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7654-7663} }
VideoBooth: Diffusion-based Video Generation with Image Prompts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2024_CVPR, author = {Jiang, Yuming and Wu, Tianxing and Yang, Shuai and Si, Chenyang and Lin, Dahua and Qiao, Yu and Loy, Chen Change and Liu, Ziwei}, title = {VideoBooth: Diffusion-based Video Generation with Image Prompts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6689-6700} }
SCULPT: Shape-Conditioned Unpaired Learning of Pose-dependent Clothed and Textured Human Meshes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sanyal_2024_CVPR, author = {Sanyal, Soubhik and Ghosh, Partha and Yang, Jinlong and Black, Michael J. and Thies, Justus and Bolkart, Timo}, title = {SCULPT: Shape-Conditioned Unpaired Learning of Pose-dependent Clothed and Textured Human Meshes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2362-2371} }
EasyDrag: Efficient Point-based Manipulation on Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Hou_2024_CVPR, author = {Hou, Xingzhong and Liu, Boxiao and Zhang, Yi and Liu, Jihao and Liu, Yu and You, Haihang}, title = {EasyDrag: Efficient Point-based Manipulation on Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8404-8413} }
InterHandGen: Two-Hand Interaction Generation via Cascaded Reverse Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2024_CVPR, author = {Lee, Jihyun and Saito, Shunsuke and Nam, Giljoo and Sung, Minhyuk and Kim, Tae-Kyun}, title = {InterHandGen: Two-Hand Interaction Generation via Cascaded Reverse Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {527-537} }
Video2Game: Real-time Interactive Realistic and Browser-Compatible Environment from a Single Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2024_CVPR, author = {Xia, Hongchi and Lin, Zhi-Hao and Ma, Wei-Chiu and Wang, Shenlong}, title = {Video2Game: Real-time Interactive Realistic and Browser-Compatible Environment from a Single Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4578-4588} }
Tackling the Singularities at the Endpoints of Time Intervals in Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Pengze and Yin, Hubery and Li, Chen and Xie, Xiaohua}, title = {Tackling the Singularities at the Endpoints of Time Intervals in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6945-6954} }
CHAIN: Enhancing Generalization in Data-Efficient GANs via lipsCHitz continuity constrAIned Normalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ni_2024_CVPR, author = {Ni, Yao and Koniusz, Piotr}, title = {CHAIN: Enhancing Generalization in Data-Efficient GANs via lipsCHitz continuity constrAIned Normalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6763-6774} }
High-Quality Facial Geometry and Appearance Capture at Home-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2024_CVPR, author = {Han, Yuxuan and Lyu, Junfeng and Xu, Feng}, title = {High-Quality Facial Geometry and Appearance Capture at Home}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {697-707} }
Your Image is My Video: Reshaping the Receptive Field via Image-To-Video Differentiable AutoAugmentation and Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Casarin_2024_CVPR, author = {Casarin, Sofia and Ugwu, Cynthia I. and Escalera, Sergio and Lanz, Oswald}, title = {Your Image is My Video: Reshaping the Receptive Field via Image-To-Video Differentiable AutoAugmentation and Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5829-5839} }
SpikingResformer: Bridging ResNet and Vision Transformer in Spiking Neural Networks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2024_CVPR, author = {Shi, Xinyu and Hao, Zecheng and Yu, Zhaofei}, title = {SpikingResformer: Bridging ResNet and Vision Transformer in Spiking Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5610-5619} }
Self-Supervised Dual Contouring-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sundararaman_2024_CVPR, author = {Sundararaman, Ramana and Klokov, Roman and Ovsjanikov, Maks}, title = {Self-Supervised Dual Contouring}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4681-4691} }
GSVA: Generalized Segmentation via Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2024_CVPR, author = {Xia, Zhuofan and Han, Dongchen and Han, Yizeng and Pan, Xuran and Song, Shiji and Huang, Gao}, title = {GSVA: Generalized Segmentation via Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3858-3869} }
AdaBM: On-the-Fly Adaptive Bit Mapping for Image Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hong_2024_CVPR, author = {Hong, Cheeun and Lee, Kyoung Mu}, title = {AdaBM: On-the-Fly Adaptive Bit Mapping for Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2641-2650} }
SVGDreamer: Text Guided SVG Generation with Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xing_2024_CVPR, author = {Xing, Ximing and Zhou, Haitao and Wang, Chuang and Zhang, Jing and Xu, Dong and Yu, Qian}, title = {SVGDreamer: Text Guided SVG Generation with Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4546-4555} }
BlockGCN: Redefine Topology Awareness for Skeleton-Based Action Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Yuxuan and Yan, Xudong and Cheng, Zhi-Qi and Yan, Yan and Dai, Qi and Hua, Xian-Sheng}, title = {BlockGCN: Redefine Topology Awareness for Skeleton-Based Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2049-2058} }
Structure-Guided Adversarial Training of Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Ling and Qian, Haotian and Zhang, Zhilong and Liu, Jingwei and Cui, Bin}, title = {Structure-Guided Adversarial Training of Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7256-7266} }
NIFTY: Neural Object Interaction Fields for Guided Human Motion Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kulkarni_2024_CVPR, author = {Kulkarni, Nilesh and Rempe, Davis and Genova, Kyle and Kundu, Abhijit and Johnson, Justin and Fouhey, David and Guibas, Leonidas}, title = {NIFTY: Neural Object Interaction Fields for Guided Human Motion Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {947-957} }
Can Language Beat Numerical Regression? Language-Based Multimodal Trajectory Prediction-
[pdf]
[arXiv]
[bibtex]@InProceedings{Bae_2024_CVPR, author = {Bae, Inhwan and Lee, Junoh and Jeon, Hae-Gon}, title = {Can Language Beat Numerical Regression? Language-Based Multimodal Trajectory Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {753-766} }
Building Optimal Neural Architectures using Interpretable Knowledge-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mills_2024_CVPR, author = {Mills, Keith G. and Han, Fred X. and Salameh, Mohammad and Lu, Shengyao and Zhou, Chunhua and He, Jiao and Sun, Fengyu and Niu, Di}, title = {Building Optimal Neural Architectures using Interpretable Knowledge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5726-5735} }
Holo-Relighting: Controllable Volumetric Portrait Relighting from a Single Image-
[pdf]
[supp]
[bibtex]@InProceedings{Mei_2024_CVPR, author = {Mei, Yiqun and Zeng, Yu and Zhang, He and Shu, Zhixin and Zhang, Xuaner and Bi, Sai and Zhang, Jianming and Jung, HyunJoon and Patel, Vishal M.}, title = {Holo-Relighting: Controllable Volumetric Portrait Relighting from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4263-4273} }
Noisy One-point Homographies are Surprisingly Good-
[pdf]
[supp]
[bibtex]@InProceedings{Ding_2024_CVPR, author = {Ding, Yaqing and Astermark, Jonathan and Oskarsson, Magnus and Larsson, Viktor}, title = {Noisy One-point Homographies are Surprisingly Good}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5125-5134} }
Panacea: Panoramic and Controllable Video Generation for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wen_2024_CVPR, author = {Wen, Yuqing and Zhao, Yucheng and Liu, Yingfei and Jia, Fan and Wang, Yanhui and Luo, Chong and Zhang, Chi and Wang, Tiancai and Sun, Xiaoyan and Zhang, Xiangyu}, title = {Panacea: Panoramic and Controllable Video Generation for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6902-6912} }
DreamMatcher: Appearance Matching Self-Attention for Semantically-Consistent Text-to-Image Personalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nam_2024_CVPR, author = {Nam, Jisu and Kim, Heesu and Lee, DongJae and Jin, Siyoon and Kim, Seungryong and Chang, Seunggyu}, title = {DreamMatcher: Appearance Matching Self-Attention for Semantically-Consistent Text-to-Image Personalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8100-8110} }
PolarMatte: Fully Computational Ground-Truth-Quality Alpha Matte Extraction for Images and Video using Polarized Screen Matting-
[pdf]
[supp]
[bibtex]@InProceedings{Enomoto_2024_CVPR, author = {Enomoto, Kenji and Rhodes, TJ and Price, Brian and Miller, Gavin}, title = {PolarMatte: Fully Computational Ground-Truth-Quality Alpha Matte Extraction for Images and Video using Polarized Screen Matting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3901-3909} }
HOIDiffusion: Generating Realistic 3D Hand-Object Interaction Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Mengqi and Fu, Yang and Ding, Zheng and Liu, Sifei and Tu, Zhuowen and Wang, Xiaolong}, title = {HOIDiffusion: Generating Realistic 3D Hand-Object Interaction Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8521-8531} }
VecFusion: Vector Font Generation with Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Thamizharasan_2024_CVPR, author = {Thamizharasan, Vikas and Liu, Difan and Agarwal, Shantanu and Fisher, Matthew and Gharbi, Michael and Wang, Oliver and Jacobson, Alec and Kalogerakis, Evangelos}, title = {VecFusion: Vector Font Generation with Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7943-7952} }
Towards Text-guided 3D Scene Composition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Qihang and Wang, Chaoyang and Siarohin, Aliaksandr and Zhuang, Peiye and Xu, Yinghao and Yang, Ceyuan and Lin, Dahua and Zhou, Bolei and Tulyakov, Sergey and Lee, Hsin-Ying}, title = {Towards Text-guided 3D Scene Composition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6829-6838} }
EMAGE: Towards Unified Holistic Co-Speech Gesture Generation via Expressive Masked Audio Gesture Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Haiyang and Zhu, Zihao and Becherini, Giorgio and Peng, Yichen and Su, Mingyang and Zhou, You and Zhe, Xuefei and Iwamoto, Naoya and Zheng, Bo and Black, Michael J.}, title = {EMAGE: Towards Unified Holistic Co-Speech Gesture Generation via Expressive Masked Audio Gesture Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1144-1154} }
Adversarial Text to Continuous Image Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Haydarov_2024_CVPR, author = {Haydarov, Kilichbek and Muhamed, Aashiq and Shen, Xiaoqian and Lazarevic, Jovana and Skorokhodov, Ivan and Galappaththige, Chamuditha Jayanga and Elhoseiny, Mohamed}, title = {Adversarial Text to Continuous Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6316-6326} }
HumanNeRF-SE: A Simple yet Effective Approach to Animate HumanNeRF with Diverse Poses-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2024_CVPR, author = {Ma, Caoyuan and Liu, Yu-Lun and Wang, Zhixiang and Liu, Wu and Liu, Xinchen and Wang, Zheng}, title = {HumanNeRF-SE: A Simple yet Effective Approach to Animate HumanNeRF with Diverse Poses}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1460-1470} }
HOLD: Category-agnostic 3D Reconstruction of Interacting Hands and Objects from Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2024_CVPR, author = {Fan, Zicong and Parelli, Maria and Kadoglou, Maria Eleni and Chen, Xu and Kocabas, Muhammed and Black, Michael J. and Hilliges, Otmar}, title = {HOLD: Category-agnostic 3D Reconstruction of Interacting Hands and Objects from Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {494-504} }
Continual Segmentation with Disentangled Objectness Learning and Class Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gong_2024_CVPR, author = {Gong, Yizheng and Yu, Siyue and Wang, Xiaoyang and Xiao, Jimin}, title = {Continual Segmentation with Disentangled Objectness Learning and Class Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3848-3857} }
ASAM: Boosting Segment Anything Model with Adversarial Tuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Bo and Xiao, Haoke and Tang, Lv}, title = {ASAM: Boosting Segment Anything Model with Adversarial Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3699-3710} }
Dynamic Support Information Mining for Category-Agnostic Pose Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Ren_2024_CVPR, author = {Ren, Pengfei and Gao, Yuanyuan and Sun, Haifeng and Qi, Qi and Wang, Jingyu and Liao, Jianxin}, title = {Dynamic Support Information Mining for Category-Agnostic Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1921-1930} }
Taming Mode Collapse in Score Distillation for Text-to-3D Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Peihao and Xu, Dejia and Fan, Zhiwen and Wang, Dilin and Mohan, Sreyas and Iandola, Forrest and Ranjan, Rakesh and Li, Yilei and Liu, Qiang and Wang, Zhangyang and Chandra, Vikas}, title = {Taming Mode Collapse in Score Distillation for Text-to-3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9037-9047} }
MagicAnimate: Temporally Consistent Human Image Animation using Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Zhongcong and Zhang, Jianfeng and Liew, Jun Hao and Yan, Hanshu and Liu, Jia-Wei and Zhang, Chenxu and Feng, Jiashi and Shou, Mike Zheng}, title = {MagicAnimate: Temporally Consistent Human Image Animation using Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1481-1490} }
From Correspondences to Pose: Non-minimal Certifiably Optimal Relative Pose without Disambiguation-
[pdf]
[supp]
[bibtex]@InProceedings{Tirado-Garin_2024_CVPR, author = {Tirado-Gar{\'\i}n, Javier and Civera, Javier}, title = {From Correspondences to Pose: Non-minimal Certifiably Optimal Relative Pose without Disambiguation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {403-412} }
Loose Inertial Poser: Motion Capture with IMU-attached Loose-Wear Jacket-
[pdf]
[supp]
[bibtex]@InProceedings{Zuo_2024_CVPR, author = {Zuo, Chengxu and Wang, Yiming and Zhan, Lishuang and Guo, Shihui and Yi, Xinyu and Xu, Feng and Qin, Yipeng}, title = {Loose Inertial Poser: Motion Capture with IMU-attached Loose-Wear Jacket}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2209-2219} }
Training-Free Pretrained Model Merging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Zhengqi and Yuan, Ke and Wang, Huiqiong and Wang, Yong and Song, Mingli and Song, Jie}, title = {Training-Free Pretrained Model Merging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5915-5925} }
NC-SDF: Enhancing Indoor Scene Reconstruction Using Neural SDFs with View-Dependent Normal Compensation-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Ziyi and Wu, Xiaolong and Zhang, Yu}, title = {NC-SDF: Enhancing Indoor Scene Reconstruction Using Neural SDFs with View-Dependent Normal Compensation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5155-5165} }
Person in Place: Generating Associative Skeleton-Guidance Maps for Human-Object Interaction Image Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, ChangHee and Kang, ChanHee and Kong, Kyeongbo and Oh, Hanni and Kang, Suk-Ju}, title = {Person in Place: Generating Associative Skeleton-Guidance Maps for Human-Object Interaction Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8164-8175} }
ChatPose: Chatting about 3D Human Pose-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2024_CVPR, author = {Feng, Yao and Lin, Jing and Dwivedi, Sai Kumar and Sun, Yu and Patel, Priyanka and Black, Michael J.}, title = {ChatPose: Chatting about 3D Human Pose}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2093-2103} }
Distilling ODE Solvers of Diffusion Models into Smaller Steps-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Sanghwan and Tang, Hao and Yu, Fisher}, title = {Distilling ODE Solvers of Diffusion Models into Smaller Steps}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9410-9419} }
LightIt: Illumination Modeling and Control for Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kocsis_2024_CVPR, author = {Kocsis, Peter and Philip, Julien and Sunkavalli, Kalyan and Nie{\ss}ner, Matthias and Hold-Geoffroy, Yannick}, title = {LightIt: Illumination Modeling and Control for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9359-9369} }
Neural Lineage-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Runpeng and Wang, Xinchao}, title = {Neural Lineage}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4797-4807} }
Visual Layout Composer: Image-Vector Dual Diffusion Model for Design Layout Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Shabani_2024_CVPR, author = {Shabani, Mohammad Amin and Wang, Zhaowen and Liu, Difan and Zhao, Nanxuan and Yang, Jimei and Furukawa, Yasutaka}, title = {Visual Layout Composer: Image-Vector Dual Diffusion Model for Design Layout Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9222-9231} }
3D Multi-frame Fusion for Video Stabilization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2024_CVPR, author = {Peng, Zhan and Ye, Xinyi and Zhao, Weiyue and Liu, Tianqi and Sun, Huiqiang and Li, Baopu and Cao, Zhiguo}, title = {3D Multi-frame Fusion for Video Stabilization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7507-7516} }
Local-consistent Transformation Learning for Rotation-invariant Point Cloud Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Yiyang and Duan, Lunhao and Zhao, Shanshan and Ding, Changxing and Tao, Dacheng}, title = {Local-consistent Transformation Learning for Rotation-invariant Point Cloud Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5418-5427} }
Tailored Visions: Enhancing Text-to-Image Generation with Personalized Prompt Rewriting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Zijie and Zhang, Lichao and Weng, Fangsheng and Pan, Lili and Lan, Zhenzhong}, title = {Tailored Visions: Enhancing Text-to-Image Generation with Personalized Prompt Rewriting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7727-7736} }
Efficient Deformable ConvNets: Rethinking Dynamic and Sparse Operator for Vision Applications-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiong_2024_CVPR, author = {Xiong, Yuwen and Li, Zhiqi and Chen, Yuntao and Wang, Feng and Zhu, Xizhou and Luo, Jiapeng and Wang, Wenhai and Lu, Tong and Li, Hongsheng and Qiao, Yu and Lu, Lewei and Zhou, Jie and Dai, Jifeng}, title = {Efficient Deformable ConvNets: Rethinking Dynamic and Sparse Operator for Vision Applications}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5652-5661} }
CoDe: An Explicit Content Decoupling Framework for Image Restoration-
[pdf]
[supp]
[bibtex]@InProceedings{Gu_2024_CVPR, author = {Gu, Enxuan and Ge, Hongwei and Guo, Yong}, title = {CoDe: An Explicit Content Decoupling Framework for Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2920-2930} }
DreamVideo: Composing Your Dream Videos with Customized Subject and Motion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2024_CVPR, author = {Wei, Yujie and Zhang, Shiwei and Qing, Zhiwu and Yuan, Hangjie and Liu, Zhiheng and Liu, Yu and Zhang, Yingya and Zhou, Jingren and Shan, Hongming}, title = {DreamVideo: Composing Your Dream Videos with Customized Subject and Motion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6537-6549} }
Using Human Feedback to Fine-tune Diffusion Models without Any Reward Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Kai and Tao, Jian and Lyu, Jiafei and Ge, Chunjiang and Chen, Jiaxin and Shen, Weihan and Zhu, Xiaolong and Li, Xiu}, title = {Using Human Feedback to Fine-tune Diffusion Models without Any Reward Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8941-8951} }
SynSP: Synergy of Smoothness and Precision in Pose Sequences Refinement-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Tao and Jin, Lei and Wang, Zheng and Li, Jianshu and Li, Liang and Zhao, Fang and Cheng, Yu and Yuan, Li and Zhou, Li and Xing, Junliang and Zhao, Jian}, title = {SynSP: Synergy of Smoothness and Precision in Pose Sequences Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1824-1833} }
Learned Representation-Guided Diffusion Models for Large-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Graikos_2024_CVPR, author = {Graikos, Alexandros and Yellapragada, Srikar and Le, Minh-Quan and Kapse, Saarthak and Prasanna, Prateek and Saltz, Joel and Samaras, Dimitris}, title = {Learned Representation-Guided Diffusion Models for Large-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8532-8542} }
Ranni: Taming Text-to-Image Diffusion for Accurate Instruction Following-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2024_CVPR, author = {Feng, Yutong and Gong, Biao and Chen, Di and Shen, Yujun and Liu, Yu and Zhou, Jingren}, title = {Ranni: Taming Text-to-Image Diffusion for Accurate Instruction Following}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4744-4753} }
Direct2.5: Diverse Text-to-3D Generation via Multi-view 2.5D Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2024_CVPR, author = {Lu, Yuanxun and Zhang, Jingyang and Li, Shiwei and Fang, Tian and McKinnon, David and Tsin, Yanghai and Quan, Long and Cao, Xun and Yao, Yao}, title = {Direct2.5: Diverse Text-to-3D Generation via Multi-view 2.5D Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8744-8753} }
MatFuse: Controllable Material Generation with Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vecchio_2024_CVPR, author = {Vecchio, Giuseppe and Sortino, Renato and Palazzo, Simone and Spampinato, Concetto}, title = {MatFuse: Controllable Material Generation with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4429-4438} }
Training Vision Transformers for Semi-Supervised Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2024_CVPR, author = {Hu, Xinting and Jiang, Li and Schiele, Bernt}, title = {Training Vision Transformers for Semi-Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4007-4017} }
Quantifying Task Priority for Multi-Task Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeong_2024_CVPR, author = {Jeong, Wooseong and Yoon, Kuk-Jin}, title = {Quantifying Task Priority for Multi-Task Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {363-372} }
On the Scalability of Diffusion-based Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Hao and Zou, Yang and Wang, Ying and Majumder, Orchid and Xie, Yusheng and Manmatha, R. and Swaminathan, Ashwin and Tu, Zhuowen and Ermon, Stefano and Soatto, Stefano}, title = {On the Scalability of Diffusion-based Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9400-9409} }
AnySkill: Learning Open-Vocabulary Physical Skill for Interactive Agents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cui_2024_CVPR, author = {Cui, Jieming and Liu, Tengyu and Liu, Nian and Yang, Yaodong and Zhu, Yixin and Huang, Siyuan}, title = {AnySkill: Learning Open-Vocabulary Physical Skill for Interactive Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {852-862} }
Generative Unlearning for Any Identity-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Seo_2024_CVPR, author = {Seo, Juwon and Lee, Sung-Hoon and Lee, Tae-Young and Moon, Seungjun and Park, Gyeong-Moon}, title = {Generative Unlearning for Any Identity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9151-9161} }
FlowVid: Taming Imperfect Optical Flows for Consistent Video-to-Video Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2024_CVPR, author = {Liang, Feng and Wu, Bichen and Wang, Jialiang and Yu, Licheng and Li, Kunpeng and Zhao, Yinan and Misra, Ishan and Huang, Jia-Bin and Zhang, Peizhao and Vajda, Peter and Marculescu, Diana}, title = {FlowVid: Taming Imperfect Optical Flows for Consistent Video-to-Video Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8207-8216} }
StyleCineGAN: Landscape Cinemagraph Generation using a Pre-trained StyleGAN-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2024_CVPR, author = {Choi, Jongwoo and Seo, Kwanggyoon and Ashtari, Amirsaman and Noh, Junyong}, title = {StyleCineGAN: Landscape Cinemagraph Generation using a Pre-trained StyleGAN}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7872-7881} }
Laplacian-guided Entropy Model in Neural Codec with Blur-dissipated Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Khoshkhahtinat_2024_CVPR, author = {Khoshkhahtinat, Atefeh and Zafari, Ali and Mehta, Piyush M. and Nasrabadi, Nasser M.}, title = {Laplacian-guided Entropy Model in Neural Codec with Blur-dissipated Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3045-3054} }
RMT: Retentive Networks Meet Vision Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2024_CVPR, author = {Fan, Qihang and Huang, Huaibo and Chen, Mingrui and Liu, Hongmin and He, Ran}, title = {RMT: Retentive Networks Meet Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5641-5651} }
Multimodal Pathway: Improve Transformers with Irrelevant Data from Other Modalities-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yiyuan and Ding, Xiaohan and Gong, Kaixiong and Ge, Yixiao and Shan, Ying and Yue, Xiangyu}, title = {Multimodal Pathway: Improve Transformers with Irrelevant Data from Other Modalities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6108-6117} }
FaceChain-ImagineID: Freely Crafting High-Fidelity Diverse Talking Faces from Disentangled Audio-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Chao and Liu, Yang and Xing, Jiazheng and Wang, Weida and Sun, Mingze and Dan, Jun and Huang, Tianxin and Li, Siyuan and Cheng, Zhi-Qi and Tai, Ying and Sun, Baigui}, title = {FaceChain-ImagineID: Freely Crafting High-Fidelity Diverse Talking Faces from Disentangled Audio}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1292-1302} }
SSR-Encoder: Encoding Selective Subject Representation for Subject-Driven Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yuxuan and Song, Yiren and Liu, Jiaming and Wang, Rui and Yu, Jinpeng and Tang, Hao and Li, Huaxia and Tang, Xu and Hu, Yao and Pan, Han and Jing, Zhongliang}, title = {SSR-Encoder: Encoding Selective Subject Representation for Subject-Driven Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8069-8078} }
MVIP-NeRF: Multi-view 3D Inpainting on NeRF Scenes via Diffusion Prior-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Honghua and Loy, Chen Change and Pan, Xingang}, title = {MVIP-NeRF: Multi-view 3D Inpainting on NeRF Scenes via Diffusion Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5344-5353} }
StegoGAN: Leveraging Steganography for Non-Bijective Image-to-Image Translation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Sidi and Chen, Yizi and Mermet, Samuel and Hurni, Lorenz and Schindler, Konrad and Gonthier, Nicolas and Landrieu, Loic}, title = {StegoGAN: Leveraging Steganography for Non-Bijective Image-to-Image Translation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7922-7931} }
M&M VTO: Multi-Garment Virtual Try-On and Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Luyang and Li, Yingwei and Liu, Nan and Peng, Hao and Yang, Dawei and Kemelmacher-Shlizerman, Ira}, title = {M\&M VTO: Multi-Garment Virtual Try-On and Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1346-1356} }
Dynamic Inertial Poser (DynaIP): Part-Based Motion Dynamics Learning for Enhanced Human Pose Estimation with Sparse Inertial Sensors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yu and Xia, Songpengcheng and Chu, Lei and Yang, Jiarui and Wu, Qi and Pei, Ling}, title = {Dynamic Inertial Poser (DynaIP): Part-Based Motion Dynamics Learning for Enhanced Human Pose Estimation with Sparse Inertial Sensors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1889-1899} }
GraCo: Granularity-Controllable Interactive Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Yian and Li, Kehan and Cheng, Zesen and Qiao, Pengchong and Zheng, Xiawu and Ji, Rongrong and Liu, Chang and Yuan, Li and Chen, Jie}, title = {GraCo: Granularity-Controllable Interactive Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3501-3510} }
G-HOP: Generative Hand-Object Prior for Interaction Reconstruction and Grasp Synthesis-
[pdf]
[supp]
[bibtex]@InProceedings{Ye_2024_CVPR, author = {Ye, Yufei and Gupta, Abhinav and Kitani, Kris and Tulsiani, Shubham}, title = {G-HOP: Generative Hand-Object Prior for Interaction Reconstruction and Grasp Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1911-1920} }
Contrastive Denoising Score for Text-guided Latent Diffusion Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nam_2024_CVPR, author = {Nam, Hyelin and Kwon, Gihyun and Park, Geon Yeong and Ye, Jong Chul}, title = {Contrastive Denoising Score for Text-guided Latent Diffusion Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9192-9201} }
Neural Point Cloud Diffusion for Disentangled 3D Shape and Appearance Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Schroppel_2024_CVPR, author = {Schr\"oppel, Philipp and Wewer, Christopher and Lenssen, Jan Eric and Ilg, Eddy and Brox, Thomas}, title = {Neural Point Cloud Diffusion for Disentangled 3D Shape and Appearance Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8785-8794} }
VAREN: Very Accurate and Realistic Equine Network-
[pdf]
[supp]
[bibtex]@InProceedings{Zuffi_2024_CVPR, author = {Zuffi, Silvia and Mellbin, Ylva and Li, Ci and Hoeschle, Markus and Kjellstr\"om, Hedvig and Polikovsky, Senya and Hernlund, Elin and Black, Michael J.}, title = {VAREN: Very Accurate and Realistic Equine Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5374-5383} }
SD-DiT: Unleashing the Power of Self-supervised Discrimination in Diffusion Transformer-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Rui and Pan, Yingwei and Li, Yehao and Yao, Ting and Sun, Zhenglong and Mei, Tao and Chen, Chang Wen}, title = {SD-DiT: Unleashing the Power of Self-supervised Discrimination in Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8435-8445} }
MedBN: Robust Test-Time Adaptation against Malicious Test Samples-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2024_CVPR, author = {Park, Hyejin and Hwang, Jeongyeon and Mun, Sunung and Park, Sangdon and Ok, Jungseul}, title = {MedBN: Robust Test-Time Adaptation against Malicious Test Samples}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5997-6007} }
Unsupervised Gaze Representation Learning from Multi-view Face Images-
[pdf]
[bibtex]@InProceedings{Bao_2024_CVPR, author = {Bao, Yiwei and Lu, Feng}, title = {Unsupervised Gaze Representation Learning from Multi-view Face Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1419-1428} }
AEROBLADE: Training-Free Detection of Latent Diffusion Images Using Autoencoder Reconstruction Error-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ricker_2024_CVPR, author = {Ricker, Jonas and Lukovnikov, Denis and Fischer, Asja}, title = {AEROBLADE: Training-Free Detection of Latent Diffusion Images Using Autoencoder Reconstruction Error}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9130-9140} }
Point2CAD: Reverse Engineering CAD Models from 3D Point Clouds-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Yujia and Obukhov, Anton and Wegner, Jan Dirk and Schindler, Konrad}, title = {Point2CAD: Reverse Engineering CAD Models from 3D Point Clouds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3763-3772} }
LocLLM: Exploiting Generalizable Human Keypoint Localization via Large Language Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Dongkai and Xuan, Shiyu and Zhang, Shiliang}, title = {LocLLM: Exploiting Generalizable Human Keypoint Localization via Large Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {614-623} }
MMA-Diffusion: MultiModal Attack on Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Yijun and Gao, Ruiyuan and Wang, Xiaosen and Ho, Tsung-Yi and Xu, Nan and Xu, Qiang}, title = {MMA-Diffusion: MultiModal Attack on Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7737-7746} }
HanDiffuser: Text-to-Image Generation With Realistic Hand Appearances-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Narasimhaswamy_2024_CVPR, author = {Narasimhaswamy, Supreeth and Bhattacharya, Uttaran and Chen, Xiang and Dasgupta, Ishita and Mitra, Saayan and Hoai, Minh}, title = {HanDiffuser: Text-to-Image Generation With Realistic Hand Appearances}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2468-2479} }
Hierarchical Patch Diffusion Models for High-Resolution Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Skorokhodov_2024_CVPR, author = {Skorokhodov, Ivan and Menapace, Willi and Siarohin, Aliaksandr and Tulyakov, Sergey}, title = {Hierarchical Patch Diffusion Models for High-Resolution Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7569-7579} }
Neural Implicit Morphing of Face Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Schardong_2024_CVPR, author = {Schardong, Guilherme and Novello, Tiago and Paz, Hallison and Medvedev, Iurii and da Silva, Vin{\'\i}cius and Velho, Luiz and Gon\c{c}alves, Nuno}, title = {Neural Implicit Morphing of Face Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7321-7330} }
UniGS: Unified Representation for Image Generation and Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qi_2024_CVPR, author = {Qi, Lu and Yang, Lehan and Guo, Weidong and Xu, Yu and Du, Bo and Jampani, Varun and Yang, Ming-Hsuan}, title = {UniGS: Unified Representation for Image Generation and Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6305-6315} }
Training-Free Open-Vocabulary Segmentation with Offline Diffusion-Augmented Prototype Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Barsellotti_2024_CVPR, author = {Barsellotti, Luca and Amoroso, Roberto and Cornia, Marcella and Baraldi, Lorenzo and Cucchiara, Rita}, title = {Training-Free Open-Vocabulary Segmentation with Offline Diffusion-Augmented Prototype Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3689-3698} }
HUGS: Human Gaussian Splats-
[pdf]
[arXiv]
[bibtex]@InProceedings{Kocabas_2024_CVPR, author = {Kocabas, Muhammed and Chang, Jen-Hao Rick and Gabriel, James and Tuzel, Oncel and Ranjan, Anurag}, title = {HUGS: Human Gaussian Splats}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {505-515} }
PhysPT: Physics-aware Pretrained Transformer for Estimating Human Dynamics from Monocular Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yufei and Kephart, Jeffrey O. and Cui, Zijun and Ji, Qiang}, title = {PhysPT: Physics-aware Pretrained Transformer for Estimating Human Dynamics from Monocular Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2305-2317} }
EfficientDreamer: High-Fidelity and Robust 3D Creation via Orthogonal-view Diffusion Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2024_CVPR, author = {Hu, Zhipeng and Zhao, Minda and Zhao, Chaoyi and Liang, Xinyue and Li, Lincheng and Zhao, Zeng and Fan, Changjie and Zhou, Xiaowei and Yu, Xin}, title = {EfficientDreamer: High-Fidelity and Robust 3D Creation via Orthogonal-view Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4949-4958} }
HOIAnimator: Generating Text-prompt Human-object Animations using Novel Perceptive Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Song_2024_CVPR, author = {Song, Wenfeng and Zhang, Xinyu and Li, Shuai and Gao, Yang and Hao, Aimin and Hou, Xia and Chen, Chenglizhao and Li, Ning and Qin, Hong}, title = {HOIAnimator: Generating Text-prompt Human-object Animations using Novel Perceptive Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {811-820} }
SyncTalk: The Devil is in the Synchronization for Talking Head Synthesis-
[pdf]
[arXiv]
[bibtex]@InProceedings{Peng_2024_CVPR, author = {Peng, Ziqiao and Hu, Wentao and Shi, Yue and Zhu, Xiangyu and Zhang, Xiaomei and Zhao, Hao and He, Jun and Liu, Hongyan and Fan, Zhaoxin}, title = {SyncTalk: The Devil is in the Synchronization for Talking Head Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {666-676} }
DreamSalon: A Staged Diffusion Framework for Preserving Identity-Context in Editable Face Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2024_CVPR, author = {Lin, Haonan}, title = {DreamSalon: A Staged Diffusion Framework for Preserving Identity-Context in Editable Face Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8589-8598} }
Neural Super-Resolution for Real-time Rendering with Radiance Demodulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Jia and Chen, Ziling and Wu, Xiaolong and Wang, Lu and Wang, Beibei and Zhang, Lei}, title = {Neural Super-Resolution for Real-time Rendering with Radiance Demodulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4357-4367} }
MMM: Generative Masked Motion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pinyoanuntapong_2024_CVPR, author = {Pinyoanuntapong, Ekkasit and Wang, Pu and Lee, Minwoo and Chen, Chen}, title = {MMM: Generative Masked Motion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1546-1555} }
PEGASUS: Personalized Generative 3D Avatars with Composable Attributes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cha_2024_CVPR, author = {Cha, Hyunsoo and Kim, Byungjun and Joo, Hanbyul}, title = {PEGASUS: Personalized Generative 3D Avatars with Composable Attributes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1072-1081} }
Diff-Plugin: Revitalizing Details for Diffusion-based Low-level Tasks-
[pdf]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Yuhao and Ke, Zhanghan and Liu, Fang and Zhao, Nanxuan and Lau, Rynson W.H.}, title = {Diff-Plugin: Revitalizing Details for Diffusion-based Low-level Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4197-4208} }
Intelligent Grimm - Open-ended Visual Storytelling via Latent Diffusion Models-
[pdf]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Chang and Wu, Haoning and Zhong, Yujie and Zhang, Xiaoyun and Wang, Yanfeng and Xie, Weidi}, title = {Intelligent Grimm - Open-ended Visual Storytelling via Latent Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6190-6200} }
GenTron: Diffusion Transformers for Image and Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Shoufa and Xu, Mengmeng and Ren, Jiawei and Cong, Yuren and He, Sen and Xie, Yanping and Sinha, Animesh and Luo, Ping and Xiang, Tao and Perez-Rua, Juan-Manuel}, title = {GenTron: Diffusion Transformers for Image and Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6441-6451} }
TRIP: Temporal Residual Learning with Image Noise Prior for Image-to-Video Diffusion Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Zhongwei and Long, Fuchen and Pan, Yingwei and Qiu, Zhaofan and Yao, Ting and Cao, Yang and Mei, Tao}, title = {TRIP: Temporal Residual Learning with Image Noise Prior for Image-to-Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8671-8681} }
TexVocab: Texture Vocabulary-conditioned Human Avatars-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Yuxiao and Li, Zhe and Liu, Yebin and Wang, Haoqian}, title = {TexVocab: Texture Vocabulary-conditioned Human Avatars}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1715-1725} }
KITRO: Refining Human Mesh by 2D Clues and Kinematic-tree Rotation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Fengyuan and Gu, Kerui and Yao, Angela}, title = {KITRO: Refining Human Mesh by 2D Clues and Kinematic-tree Rotation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1052-1061} }
SuGaR: Surface-Aligned Gaussian Splatting for Efficient 3D Mesh Reconstruction and High-Quality Mesh Rendering-
[pdf]
[supp]
[bibtex]@InProceedings{Guedon_2024_CVPR, author = {Gu\'edon, Antoine and Lepetit, Vincent}, title = {SuGaR: Surface-Aligned Gaussian Splatting for Efficient 3D Mesh Reconstruction and High-Quality Mesh Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5354-5363} }
Towards Effective Usage of Human-Centric Priors in Diffusion Models for Text-based Human Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Junyan and Sun, Zhenhong and Tan, Zhiyu and Chen, Xuanbai and Chen, Weihua and Li, Hao and Zhang, Cheng and Song, Yang}, title = {Towards Effective Usage of Human-Centric Priors in Diffusion Models for Text-based Human Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8446-8455} }
A Video is Worth 256 Bases: Spatial-Temporal Expectation-Maximization Inversion for Zero-Shot Video Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Maomao and Li, Yu and Yang, Tianyu and Liu, Yunfei and Yue, Dongxu and Lin, Zhihui and Xu, Dong}, title = {A Video is Worth 256 Bases: Spatial-Temporal Expectation-Maximization Inversion for Zero-Shot Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7528-7537} }
URHand: Universal Relightable Hands-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Zhaoxi and Moon, Gyeongsik and Guo, Kaiwen and Cao, Chen and Pidhorskyi, Stanislav and Simon, Tomas and Joshi, Rohan and Dong, Yuan and Xu, Yichen and Pires, Bernardo and Wen, He and Evans, Lucas and Peng, Bo and Buffalini, Julia and Trimble, Autumn and McPhail, Kevyn and Schoeller, Melissa and Yu, Shoou-I and Romero, Javier and Zollhofer, Michael and Sheikh, Yaser and Liu, Ziwei and Saito, Shunsuke}, title = {URHand: Universal Relightable Hands}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {119-129} }
Named Entity Driven Zero-Shot Image Manipulation-
[pdf]
[supp]
[bibtex]@InProceedings{Feng_2024_CVPR, author = {Feng, Zhida and Chen, Li and Tian, Jing and Liu, JiaXiang and Feng, Shikun}, title = {Named Entity Driven Zero-Shot Image Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9110-9119} }
ESR-NeRF: Emissive Source Reconstruction Using LDR Multi-view Images-
[pdf]
[supp]
[bibtex]@InProceedings{Jeong_2024_CVPR, author = {Jeong, Jinseo and Koo, Junseo and Zhang, Qimeng and Kim, Gunhee}, title = {ESR-NeRF: Emissive Source Reconstruction Using LDR Multi-view Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4598-4609} }
Infer from What You Have Seen Before: Temporally-dependent Classifier for Semi-supervised Video Segmentation-
[pdf]
[bibtex]@InProceedings{Zhuang_2024_CVPR, author = {Zhuang, Jiafan and Wang, Zilei and Zhang, Yixin and Fan, Zhun}, title = {Infer from What You Have Seen Before: Temporally-dependent Classifier for Semi-supervised Video Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3575-3584} }
Video Frame Interpolation via Direct Synthesis with the Event-based Reference-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Yuhan and Deng, Yongjian and Chen, Hao and Yang, Zhen}, title = {Video Frame Interpolation via Direct Synthesis with the Event-based Reference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8477-8487} }
DSL-FIQA: Assessing Facial Image Quality via Dual-Set Degradation Learning and Landmark-Guided Transformer-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Wei-Ting and Krishnan, Gurunandan and Gao, Qiang and Kuo, Sy-Yen and Ma, Sizhou and Wang, Jian}, title = {DSL-FIQA: Assessing Facial Image Quality via Dual-Set Degradation Learning and Landmark-Guided Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2931-2941} }
FMA-Net: Flow-Guided Dynamic Filtering and Iterative Feature Refinement with Multi-Attention for Joint Video Super-Resolution and Deblurring-
[pdf]
[supp]
[bibtex]@InProceedings{Youk_2024_CVPR, author = {Youk, Geunhyuk and Oh, Jihyong and Kim, Munchurl}, title = {FMA-Net: Flow-Guided Dynamic Filtering and Iterative Feature Refinement with Multi-Attention for Joint Video Super-Resolution and Deblurring}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {44-55} }
Hourglass Tokenizer for Efficient Transformer-Based 3D Human Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Wenhao and Liu, Mengyuan and Liu, Hong and Wang, Pichao and Cai, Jialun and Sebe, Nicu}, title = {Hourglass Tokenizer for Efficient Transformer-Based 3D Human Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {604-613} }
Boosting Diffusion Models with Moving Average Sampling in Frequency Domain-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qian_2024_CVPR, author = {Qian, Yurui and Cai, Qi and Pan, Yingwei and Li, Yehao and Yao, Ting and Sun, Qibin and Mei, Tao}, title = {Boosting Diffusion Models with Moving Average Sampling in Frequency Domain}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8911-8920} }
Bi-Causal: Group Activity Recognition via Bidirectional Causality-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Youliang and Liu, Wenxuan and Xu, Danni and Zhou, Zhuo and Wang, Zheng}, title = {Bi-Causal: Group Activity Recognition via Bidirectional Causality}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1450-1459} }
Space-Time Diffusion Features for Zero-Shot Text-Driven Motion Transfer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yatim_2024_CVPR, author = {Yatim, Danah and Fridman, Rafail and Bar-Tal, Omer and Kasten, Yoni and Dekel, Tali}, title = {Space-Time Diffusion Features for Zero-Shot Text-Driven Motion Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8466-8476} }
MIGC: Multi-Instance Generation Controller for Text-to-Image Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Dewei and Li, You and Ma, Fan and Zhang, Xiaoting and Yang, Yi}, title = {MIGC: Multi-Instance Generation Controller for Text-to-Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6818-6828} }
Distilling CLIP with Dual Guidance for Learning Discriminative Human Body Shape Representation-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Feng and Kim, Minchul and Ren, Zhiyuan and Liu, Xiaoming}, title = {Distilling CLIP with Dual Guidance for Learning Discriminative Human Body Shape Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {256-266} }
LLaFS: When Large Language Models Meet Few-Shot Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Lanyun and Chen, Tianrun and Ji, Deyi and Ye, Jieping and Liu, Jun}, title = {LLaFS: When Large Language Models Meet Few-Shot Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3065-3075} }
Kernel Adaptive Convolution for Scene Text Detection via Distance Map Prediction-
[pdf]
[bibtex]@InProceedings{Zheng_2024_CVPR, author = {Zheng, Jinzhi and Fan, Heng and Zhang, Libo}, title = {Kernel Adaptive Convolution for Scene Text Detection via Distance Map Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5957-5966} }
Adaptive Multi-Modal Cross-Entropy Loss for Stereo Matching-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Peng and Xiang, Zhiyu and Qiao, Chengyu and Fu, Jingyun and Pu, Tianyu}, title = {Adaptive Multi-Modal Cross-Entropy Loss for Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5135-5144} }
Unlocking the Potential of Prompt-Tuning in Bridging Generalized and Personalized Federated Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deng_2024_CVPR, author = {Deng, Wenlong and Thrampoulidis, Christos and Li, Xiaoxiao}, title = {Unlocking the Potential of Prompt-Tuning in Bridging Generalized and Personalized Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6087-6097} }
GALA: Generating Animatable Layered Assets from a Single Scan-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Taeksoo and Kim, Byungjun and Saito, Shunsuke and Joo, Hanbyul}, title = {GALA: Generating Animatable Layered Assets from a Single Scan}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1535-1545} }
LeGO: Leveraging a Surface Deformation Network for Animatable Stylized Face Generation with One Example-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yoon_2024_CVPR, author = {Yoon, Soyeon and Yun, Kwan and Seo, Kwanggyoon and Cha, Sihun and Yoo, Jung Eun and Noh, Junyong}, title = {LeGO: Leveraging a Surface Deformation Network for Animatable Stylized Face Generation with One Example}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4505-4514} }
Frequency-Adaptive Dilated Convolution for Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Linwei and Gu, Lin and Zheng, Dezhi and Fu, Ying}, title = {Frequency-Adaptive Dilated Convolution for Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3414-3425} }
Multiple View Geometry Transformers for 3D Human Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liao_2024_CVPR, author = {Liao, Ziwei and Zhu, Jialiang and Wang, Chunyu and Hu, Han and Waslander, Steven L.}, title = {Multiple View Geometry Transformers for 3D Human Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {708-717} }
SiTH: Single-view Textured Human Reconstruction with Image-Conditioned Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{I_Ho_2024_CVPR, author = {I Ho, Hsuan- and Song, Jie and Hilliges, Otmar}, title = {SiTH: Single-view Textured Human Reconstruction with Image-Conditioned Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {538-549} }
DynVideo-E: Harnessing Dynamic NeRF for Large-Scale Motion- and View-Change Human-Centric Video Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Jia-Wei and Cao, Yan-Pei and Wu, Jay Zhangjie and Mao, Weijia and Gu, Yuchao and Zhao, Rui and Keppo, Jussi and Shan, Ying and Shou, Mike Zheng}, title = {DynVideo-E: Harnessing Dynamic NeRF for Large-Scale Motion- and View-Change Human-Centric Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7664-7674} }
Real-Time Neural BRDF with Spherically Distributed Primitives-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dou_2024_CVPR, author = {Dou, Yishun and Zheng, Zhong and Jin, Qiaoqiao and Ni, Bingbing and Chen, Yugang and Ke, Junxiang}, title = {Real-Time Neural BRDF with Spherically Distributed Primitives}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4337-4346} }
VideoCrafter2: Overcoming Data Limitations for High-Quality Video Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Haoxin and Zhang, Yong and Cun, Xiaodong and Xia, Menghan and Wang, Xintao and Weng, Chao and Shan, Ying}, title = {VideoCrafter2: Overcoming Data Limitations for High-Quality Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7310-7320} }
Style Injection in Diffusion: A Training-free Approach for Adapting Large-scale Diffusion Models for Style Transfer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chung_2024_CVPR, author = {Chung, Jiwoo and Hyun, Sangeek and Heo, Jae-Pil}, title = {Style Injection in Diffusion: A Training-free Approach for Adapting Large-scale Diffusion Models for Style Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8795-8805} }
OrthCaps: An Orthogonal CapsNet with Sparse Attention Routing and Pruning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Geng_2024_CVPR, author = {Geng, Xinyu and Wang, Jiaming and Gong, Jiawei and Xue, Yuerong and Xu, Jun and Chen, Fanglin and Huang, Xiaolin}, title = {OrthCaps: An Orthogonal CapsNet with Sparse Attention Routing and Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6037-6046} }
Florence-2: Advancing a Unified Representation for a Variety of Vision Tasks-
[pdf]
[supp]
[bibtex]@InProceedings{Xiao_2024_CVPR, author = {Xiao, Bin and Wu, Haiping and Xu, Weijian and Dai, Xiyang and Hu, Houdong and Lu, Yumao and Zeng, Michael and Liu, Ce and Yuan, Lu}, title = {Florence-2: Advancing a Unified Representation for a Variety of Vision Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4818-4829} }
NeRF On-the-go: Exploiting Uncertainty for Distractor-free NeRFs in the Wild-
[pdf]
[supp]
[bibtex]@InProceedings{Ren_2024_CVPR, author = {Ren, Weining and Zhu, Zihan and Sun, Boyang and Chen, Jiaqi and Pollefeys, Marc and Peng, Songyou}, title = {NeRF On-the-go: Exploiting Uncertainty for Distractor-free NeRFs in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8931-8940} }
3D Human Pose Perception from Egocentric Stereo Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Akada_2024_CVPR, author = {Akada, Hiroyasu and Wang, Jian and Golyanik, Vladislav and Theobalt, Christian}, title = {3D Human Pose Perception from Egocentric Stereo Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {767-776} }
Grid Diffusion Models for Text-to-Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2024_CVPR, author = {Lee, Taegyeong and Kwon, Soyeong and Kim, Taehwan}, title = {Grid Diffusion Models for Text-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8734-8743} }
LucidDreamer: Towards High-Fidelity Text-to-3D Generation via Interval Score Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2024_CVPR, author = {Liang, Yixun and Yang, Xin and Lin, Jiantao and Li, Haodong and Xu, Xiaogang and Chen, Yingcong}, title = {LucidDreamer: Towards High-Fidelity Text-to-3D Generation via Interval Score Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6517-6526} }
PTM-VQA: Efficient Video Quality Assessment Leveraging Diverse PreTrained Models from the Wild-
[pdf]
[bibtex]@InProceedings{Yuan_2024_CVPR, author = {Yuan, Kun and Liu, Hongbo and Li, Mading and Sun, Muyi and Sun, Ming and Gong, Jiachao and Hao, Jinhua and Zhou, Chao and Tang, Yansong}, title = {PTM-VQA: Efficient Video Quality Assessment Leveraging Diverse PreTrained Models from the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2835-2845} }
REACTO: Reconstructing Articulated Objects from a Single Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2024_CVPR, author = {Song, Chaoyue and Wei, Jiacheng and Foo, Chuan Sheng and Lin, Guosheng and Liu, Fayao}, title = {REACTO: Reconstructing Articulated Objects from a Single Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5384-5395} }
Egocentric Whole-Body Motion Capture with FisheyeViT and Diffusion-Based Motion Refinement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Jian and Cao, Zhe and Luvizon, Diogo and Liu, Lingjie and Sarkar, Kripasindhu and Tang, Danhang and Beeler, Thabo and Theobalt, Christian}, title = {Egocentric Whole-Body Motion Capture with FisheyeViT and Diffusion-Based Motion Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {777-787} }
Language Embedded 3D Gaussians for Open-Vocabulary Scene Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2024_CVPR, author = {Shi, Jin-Chuan and Wang, Miao and Duan, Hao-Bin and Guan, Shao-Hua}, title = {Language Embedded 3D Gaussians for Open-Vocabulary Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5333-5343} }
Towards Automated Movie Trailer Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Argaw_2024_CVPR, author = {Argaw, Dawit Mureja and Soldan, Mattia and Pardo, Alejandro and Zhao, Chen and Heilbron, Fabian Caba and Chung, Joon Son and Ghanem, Bernard}, title = {Towards Automated Movie Trailer Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7445-7454} }
Sheared Backpropagation for Fine-tuning Foundation Models-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Zhiyuan and Shen, Li and Ding, Liang and Tian, Xinmei and Chen, Yixin and Tao, Dacheng}, title = {Sheared Backpropagation for Fine-tuning Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5883-5892} }
Misalignment-Robust Frequency Distribution Loss for Image Transformation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ni_2024_CVPR, author = {Ni, Zhangkai and Wu, Juncheng and Wang, Zian and Yang, Wenhan and Wang, Hanli and Ma, Lin}, title = {Misalignment-Robust Frequency Distribution Loss for Image Transformation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2910-2919} }
Degrees of Freedom Matter: Inferring Dynamics from Point Trajectories-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yan and Prokudin, Sergey and Mihajlovic, Marko and Ma, Qianli and Tang, Siyu}, title = {Degrees of Freedom Matter: Inferring Dynamics from Point Trajectories}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2018-2028} }
Low-Latency Neural Stereo Streaming-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hou_2024_CVPR, author = {Hou, Qiqi and Farhadzadeh, Farzad and Said, Amir and Sautiere, Guillaume and Le, Hoang}, title = {Low-Latency Neural Stereo Streaming}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7974-7984} }
Intrinsic Image Diffusion for Indoor Single-view Material Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Kocsis_2024_CVPR, author = {Kocsis, Peter and Sitzmann, Vincent and Nie{\ss}ner, Matthias}, title = {Intrinsic Image Diffusion for Indoor Single-view Material Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5198-5208} }
Material Palette: Extraction of Materials from a Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lopes_2024_CVPR, author = {Lopes, Ivan and Pizzati, Fabio and de Charette, Raoul}, title = {Material Palette: Extraction of Materials from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4379-4388} }
RealCustom: Narrowing Real Text Word for Real-Time Open-Domain Text-to-Image Customization-
[pdf]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Mengqi and Mao, Zhendong and Liu, Mingcong and He, Qian and Zhang, Yongdong}, title = {RealCustom: Narrowing Real Text Word for Real-Time Open-Domain Text-to-Image Customization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7476-7485} }
Text2QR: Harmonizing Aesthetic Customization and Scanning Robustness for Text-Guided QR Code Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Guangyang and Liu, Xiaohong and Jia, Jun and Cui, Xuehao and Zhai, Guangtao}, title = {Text2QR: Harmonizing Aesthetic Customization and Scanning Robustness for Text-Guided QR Code Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8456-8465} }
ECLIPSE: A Resource-Efficient Text-to-Image Prior for Image Generations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Patel_2024_CVPR, author = {Patel, Maitreya and Kim, Changhoon and Cheng, Sheng and Baral, Chitta and Yang, Yezhou}, title = {ECLIPSE: A Resource-Efficient Text-to-Image Prior for Image Generations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9069-9078} }
Adaptive Bidirectional Displacement for Semi-Supervised Medical Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chi_2024_CVPR, author = {Chi, Hanyang and Pang, Jian and Zhang, Bingfeng and Liu, Weifeng}, title = {Adaptive Bidirectional Displacement for Semi-Supervised Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4070-4080} }
Accurate Training Data for Occupancy Map Prediction in Automated Driving Using Evidence Theory-
[pdf]
[supp]
[bibtex]@InProceedings{Kalble_2024_CVPR, author = {K\"alble, Jonas and Wirges, Sascha and Tatarchenko, Maxim and Ilg, Eddy}, title = {Accurate Training Data for Occupancy Map Prediction in Automated Driving Using Evidence Theory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5281-5290} }
DiffusionLight: Light Probes for Free by Painting a Chrome Ball-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Phongthawee_2024_CVPR, author = {Phongthawee, Pakkapon and Chinchuthakun, Worameth and Sinsunthithet, Nontaphat and Jampani, Varun and Raj, Amit and Khungurn, Pramook and Suwajanakorn, Supasorn}, title = {DiffusionLight: Light Probes for Free by Painting a Chrome Ball}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {98-108} }
Rethinking the Spatial Inconsistency in Classifier-Free Diffusion Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shen_2024_CVPR, author = {Shen, Dazhong and Song, Guanglu and Xue, Zeyue and Wang, Fu-Yun and Liu, Yu}, title = {Rethinking the Spatial Inconsistency in Classifier-Free Diffusion Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9370-9379} }
KTPFormer: Kinematics and Trajectory Prior Knowledge-Enhanced Transformer for 3D Human Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2024_CVPR, author = {Peng, Jihua and Zhou, Yanghong and Mok, P. Y.}, title = {KTPFormer: Kinematics and Trajectory Prior Knowledge-Enhanced Transformer for 3D Human Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1123-1132} }
Differentiable Micro-Mesh Construction-
[pdf]
[supp]
[bibtex]@InProceedings{Dou_2024_CVPR, author = {Dou, Yishun and Zheng, Zhong and Jin, Qiaoqiao and Shi, Rui and Li, Yuhan and Ni, Bingbing}, title = {Differentiable Micro-Mesh Construction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4294-4303} }
SNED: Superposition Network Architecture Search for Efficient Video Diffusion Model-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Zhengang and Kang, Yan and Liu, Yuchen and Liu, Difan and Hinz, Tobias and Liu, Feng and Wang, Yanzhi}, title = {SNED: Superposition Network Architecture Search for Efficient Video Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8661-8670} }
LeftRefill: Filling Right Canvas based on Left Reference through Generalized Text-to-Image Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2024_CVPR, author = {Cao, Chenjie and Cai, Yunuo and Dong, Qiaole and Wang, Yikai and Fu, Yanwei}, title = {LeftRefill: Filling Right Canvas based on Left Reference through Generalized Text-to-Image Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7705-7715} }
Personalized Residuals for Concept-Driven Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ham_2024_CVPR, author = {Ham, Cusuh and Fisher, Matthew and Hays, James and Kolkin, Nicholas and Liu, Yuchen and Zhang, Richard and Hinz, Tobias}, title = {Personalized Residuals for Concept-Driven Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8186-8195} }
Condition-Aware Neural Network for Controlled Image Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Cai_2024_CVPR, author = {Cai, Han and Li, Muyang and Zhang, Qinsheng and Liu, Ming-Yu and Han, Song}, title = {Condition-Aware Neural Network for Controlled Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7194-7203} }
Prompt Augmentation for Self-supervised Text-guided Image Manipulation-
[pdf]
[supp]
[bibtex]@InProceedings{Bodur_2024_CVPR, author = {Bodur, Rumeysa and Bhattarai, Binod and Kim, Tae-Kyun}, title = {Prompt Augmentation for Self-supervised Text-guided Image Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8829-8838} }
Guess The Unseen: Dynamic 3D Scene Reconstruction from Partial 2D Glimpses-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2024_CVPR, author = {Lee, Inhee and Kim, Byungjun and Joo, Hanbyul}, title = {Guess The Unseen: Dynamic 3D Scene Reconstruction from Partial 2D Glimpses}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1062-1071} }
HyperDreamBooth: HyperNetworks for Fast Personalization of Text-to-Image Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ruiz_2024_CVPR, author = {Ruiz, Nataniel and Li, Yuanzhen and Jampani, Varun and Wei, Wei and Hou, Tingbo and Pritch, Yael and Wadhwa, Neal and Rubinstein, Michael and Aberman, Kfir}, title = {HyperDreamBooth: HyperNetworks for Fast Personalization of Text-to-Image Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6527-6536} }
HardMo: A Large-Scale Hardcase Dataset for Motion Capture-
[pdf]
[supp]
[bibtex]@InProceedings{Liao_2024_CVPR, author = {Liao, Jiaqi and Luo, Chuanchen and Du, Yinuo and Wang, Yuxi and Yin, Xucheng and Zhang, Man and Zhang, Zhaoxiang and Peng, Junran}, title = {HardMo: A Large-Scale Hardcase Dataset for Motion Capture}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1629-1638} }
Separate and Conquer: Decoupling Co-occurrence via Decomposition and Representation for Weakly Supervised Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Zhiwei and Fu, Kexue and Duan, Minghong and Qu, Linhao and Wang, Shuo and Song, Zhijian}, title = {Separate and Conquer: Decoupling Co-occurrence via Decomposition and Representation for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3606-3615} }
BiPer: Binary Neural Networks using a Periodic Function-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vargas_2024_CVPR, author = {Vargas, Edwin and Correa, Claudia V. and Hinojosa, Carlos and Arguello, Henry}, title = {BiPer: Binary Neural Networks using a Periodic Function}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5684-5693} }
Segment Any Event Streams via Weighted Adaptation of Pivotal Tokens-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Zhiwen and Zhu, Zhiyu and Zhang, Yifan and Hou, Junhui and Shi, Guangming and Wu, Jinjian}, title = {Segment Any Event Streams via Weighted Adaptation of Pivotal Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3890-3900} }
AnyDoor: Zero-shot Object-level Image Customization-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Xi and Huang, Lianghua and Liu, Yu and Shen, Yujun and Zhao, Deli and Zhao, Hengshuang}, title = {AnyDoor: Zero-shot Object-level Image Customization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6593-6602} }
Clustering Propagation for Universal Medical Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ding_2024_CVPR, author = {Ding, Yuhang and Li, Liulei and Wang, Wenguan and Yang, Yi}, title = {Clustering Propagation for Universal Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3357-3369} }
Garment Recovery with Shape and Deformation Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Ren and Dumery, Corentin and Guillard, Beno{\^\i}t and Fua, Pascal}, title = {Garment Recovery with Shape and Deformation Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1586-1595} }
Psychometry: An Omnifit Model for Image Reconstruction from Human Brain Activity-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Quan_2024_CVPR, author = {Quan, Ruijie and Wang, Wenguan and Tian, Zhibo and Ma, Fan and Yang, Yi}, title = {Psychometry: An Omnifit Model for Image Reconstruction from Human Brain Activity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {233-243} }
Exploring Regional Clues in CLIP for Zero-Shot Semantic Segmentation-
[pdf]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yi and Guo, Meng-Hao and Wang, Miao and Hu, Shi-Min}, title = {Exploring Regional Clues in CLIP for Zero-Shot Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3270-3280} }
Move as You Say Interact as You Can: Language-guided Human Motion Generation with Scene Affordance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Zan and Chen, Yixin and Jia, Baoxiong and Li, Puhao and Zhang, Jinlu and Zhang, Jingze and Liu, Tengyu and Zhu, Yixin and Liang, Wei and Huang, Siyuan}, title = {Move as You Say Interact as You Can: Language-guided Human Motion Generation with Scene Affordance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {433-444} }
Generalizable Face Landmarking Guided by Conditional Face Warping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2024_CVPR, author = {Liang, Jiayi and Liu, Haotian and Xu, Hongteng and Luo, Dixin}, title = {Generalizable Face Landmarking Guided by Conditional Face Warping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2425-2435} }
Sat2Scene: 3D Urban Scene Generation from Satellite Images with Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Zuoyue and Li, Zhenqiang and Cui, Zhaopeng and Pollefeys, Marc and Oswald, Martin R.}, title = {Sat2Scene: 3D Urban Scene Generation from Satellite Images with Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7141-7150} }
Control4D: Efficient 4D Portrait Editing with Text-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shao_2024_CVPR, author = {Shao, Ruizhi and Sun, Jingxiang and Peng, Cheng and Zheng, Zerong and Zhou, Boyao and Zhang, Hongwen and Liu, Yebin}, title = {Control4D: Efficient 4D Portrait Editing with Text}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4556-4567} }
CLIPtone: Unsupervised Learning for Text-based Image Tone Adjustment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2024_CVPR, author = {Lee, Hyeongmin and Kang, Kyoungkook and Ok, Jungseul and Cho, Sunghyun}, title = {CLIPtone: Unsupervised Learning for Text-based Image Tone Adjustment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2942-2951} }
Codebook Transfer with Part-of-Speech for Vector-Quantized Image Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Baoquan and Wang, Huaibin and Luo, Chuyao and Li, Xutao and Liang, Guotao and Ye, Yunming and Qi, Xiaochen and He, Yao}, title = {Codebook Transfer with Part-of-Speech for Vector-Quantized Image Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7757-7766} }
InceptionNeXt: When Inception Meets ConvNeXt-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Weihao and Zhou, Pan and Yan, Shuicheng and Wang, Xinchao}, title = {InceptionNeXt: When Inception Meets ConvNeXt}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5672-5683} }
LiveHPS: LiDAR-based Scene-level Human Pose and Shape Estimation in Free Environment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2024_CVPR, author = {Ren, Yiming and Han, Xiao and Zhao, Chengfeng and Wang, Jingya and Xu, Lan and Yu, Jingyi and Ma, Yuexin}, title = {LiveHPS: LiDAR-based Scene-level Human Pose and Shape Estimation in Free Environment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1281-1291} }
Segment Every Out-of-Distribution Object-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Wenjie and Li, Jia and Dong, Xin and Xiang, Yu and Guo, Yunhui}, title = {Segment Every Out-of-Distribution Object}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3910-3920} }
Wavelet-based Fourier Information Interaction with Frequency Diffusion Adjustment for Underwater Image Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Chen and Cai, Weiling and Dong, Chenyu and Hu, Chengwei}, title = {Wavelet-based Fourier Information Interaction with Frequency Diffusion Adjustment for Underwater Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8281-8291} }
PoNQ: a Neural QEM-based Mesh Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Maruani_2024_CVPR, author = {Maruani, Nissim and Ovsjanikov, Maks and Alliez, Pierre and Desbrun, Mathieu}, title = {PoNQ: a Neural QEM-based Mesh Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3647-3657} }
Boosting Order-Preserving and Transferability for Neural Architecture Search: a Joint Architecture Refined Search and Fine-tuning Approach-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Beichen and Wang, Xiaoxing and Qin, Xiaohan and Yan, Junchi}, title = {Boosting Order-Preserving and Transferability for Neural Architecture Search: a Joint Architecture Refined Search and Fine-tuning Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5662-5671} }
Dr. Bokeh: DiffeRentiable Occlusion-aware Bokeh Rendering-
[pdf]
[supp]
[bibtex]@InProceedings{Sheng_2024_CVPR, author = {Sheng, Yichen and Yu, Zixun and Ling, Lu and Cao, Zhiwen and Zhang, Xuaner and Lu, Xin and Xian, Ke and Lin, Haiting and Benes, Bedrich}, title = {Dr. Bokeh: DiffeRentiable Occlusion-aware Bokeh Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4515-4525} }
LAENeRF: Local Appearance Editing for Neural Radiance Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Radl_2024_CVPR, author = {Radl, Lukas and Steiner, Michael and Kurz, Andreas and Steinberger, Markus}, title = {LAENeRF: Local Appearance Editing for Neural Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4969-4978} }
Adversarial Score Distillation: When score distillation meets GAN-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2024_CVPR, author = {Wei, Min and Zhou, Jingkai and Sun, Junyao and Zhang, Xuesong}, title = {Adversarial Score Distillation: When score distillation meets GAN}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8131-8141} }
Vector Graphics Generation via Mutually Impulsed Dual-domain Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Zhongyin and Chen, Ye and Hu, Zhangli and Chen, Xuanhong and Ni, Bingbing}, title = {Vector Graphics Generation via Mutually Impulsed Dual-domain Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4420-4428} }
ScoreHypo: Probabilistic Human Mesh Estimation with Hypothesis Scoring-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Yuan and Ma, Xiaoxuan and Su, Jiajun and Zhu, Wentao and Qiao, Yu and Wang, Yizhou}, title = {ScoreHypo: Probabilistic Human Mesh Estimation with Hypothesis Scoring}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {979-989} }
MeshPose: Unifying DensePose and 3D Body Mesh Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Le_2024_CVPR, author = {Le, Eric-Tuan and Kakolyris, Antonis and Koutras, Petros and Tam, Himmy and Skordos, Efstratios and Papandreou, George and G\"uler, Riza Alp and Kokkinos, Iasonas}, title = {MeshPose: Unifying DensePose and 3D Body Mesh Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2405-2414} }
Unsupervised Salient Instance Detection-
[pdf]
[bibtex]@InProceedings{Tian_2024_CVPR, author = {Tian, Xin and Xu, Ke and Lau, Rynson}, title = {Unsupervised Salient Instance Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2702-2712} }
Move Anything with Layered Scene Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2024_CVPR, author = {Ren, Jiawei and Xu, Mengmeng and Wu, Jui-Chieh and Liu, Ziwei and Xiang, Tao and Toisoul, Antoine}, title = {Move Anything with Layered Scene Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6380-6389} }
Human Gaussian Splatting: Real-time Rendering of Animatable Avatars-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Moreau_2024_CVPR, author = {Moreau, Arthur and Song, Jifei and Dhamo, Helisa and Shaw, Richard and Zhou, Yiren and P\'erez-Pellitero, Eduardo}, title = {Human Gaussian Splatting: Real-time Rendering of Animatable Avatars}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {788-798} }
The Devil is in the Details: StyleFeatureEditor for Detail-Rich StyleGAN Inversion and High Quality Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bobkov_2024_CVPR, author = {Bobkov, Denis and Titov, Vadim and Alanov, Aibek and Vetrov, Dmitry}, title = {The Devil is in the Details: StyleFeatureEditor for Detail-Rich StyleGAN Inversion and High Quality Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9337-9346} }
Unbiased Estimator for Distorted Conics in Camera Calibration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2024_CVPR, author = {Song, Chaehyeon and Shin, Jaeho and Jeon, Myung-Hwan and Lim, Jongwoo and Kim, Ayoung}, title = {Unbiased Estimator for Distorted Conics in Camera Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {373-381} }
MultiPhys: Multi-Person Physics-aware 3D Motion Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ugrinovic_2024_CVPR, author = {Ugrinovic, Nicolas and Pan, Boxiao and Pavlakos, Georgios and Paschalidou, Despoina and Shen, Bokui and Sanchez-Riera, Jordi and Moreno-Noguer, Francesc and Guibas, Leonidas}, title = {MultiPhys: Multi-Person Physics-aware 3D Motion Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2331-2340} }
NIVeL: Neural Implicit Vector Layers for Text-to-Vector Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Thamizharasan_2024_CVPR, author = {Thamizharasan, Vikas and Liu, Difan and Fisher, Matthew and Zhao, Nanxuan and Kalogerakis, Evangelos and Lukac, Michal}, title = {NIVeL: Neural Implicit Vector Layers for Text-to-Vector Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4589-4597} }
OAKINK2: A Dataset of Bimanual Hands-Object Manipulation in Complex Task Completion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhan_2024_CVPR, author = {Zhan, Xinyu and Yang, Lixin and Zhao, Yifei and Mao, Kangrui and Xu, Hanlin and Lin, Zenan and Li, Kailin and Lu, Cewu}, title = {OAKINK2: A Dataset of Bimanual Hands-Object Manipulation in Complex Task Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {445-456} }
Text-Guided 3D Face Synthesis - From Generation to Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Yunjie and Meng, Yapeng and Hu, Zhipeng and Li, Lincheng and Wu, Haoqian and Zhou, Kun and Xu, Weiwei and Yu, Xin}, title = {Text-Guided 3D Face Synthesis - From Generation to Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1260-1269} }
Multiplane Prior Guided Few-Shot Aerial Scene Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2024_CVPR, author = {Gao, Zihan and Jiao, Licheng and Li, Lingling and Liu, Xu and Liu, Fang and Chen, Puhua and Guo, Yuwei}, title = {Multiplane Prior Guided Few-Shot Aerial Scene Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5009-5019} }
MAS: Multi-view Ancestral Sampling for 3D Motion Generation Using 2D Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kapon_2024_CVPR, author = {Kapon, Roy and Tevet, Guy and Cohen-Or, Daniel and Bermano, Amit H.}, title = {MAS: Multi-view Ancestral Sampling for 3D Motion Generation Using 2D Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1965-1974} }
Bilateral Event Mining and Complementary for Event Stream Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Zhilin and Liang, Quanmin and Yu, Yijie and Qin, Chujun and Zheng, Xiawu and Huang, Kai and Zhou, Zikun and Yang, Wenming}, title = {Bilateral Event Mining and Complementary for Event Stream Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {34-43} }
SANeRF-HQ: Segment Anything for NeRF in High Quality-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Yichen and Hu, Benran and Tang, Chi-Keung and Tai, Yu-Wing}, title = {SANeRF-HQ: Segment Anything for NeRF in High Quality}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3216-3226} }
Transcending the Limit of Local Window: Advanced Super-Resolution Transformer with Adaptive Token Dictionary-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Leheng and Li, Yawei and Zhou, Xingyu and Zhao, Xiaorui and Gu, Shuhang}, title = {Transcending the Limit of Local Window: Advanced Super-Resolution Transformer with Adaptive Token Dictionary}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2856-2865} }
Mixed-Precision Quantization for Federated Learning on Resource-Constrained Heterogeneous Devices-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Huancheng and Vikalo, Haris}, title = {Mixed-Precision Quantization for Federated Learning on Resource-Constrained Heterogeneous Devices}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6138-6148} }
Neural Fields as Distributions: Signal Processing Beyond Euclidean Space-
[pdf]
[supp]
[bibtex]@InProceedings{Rebain_2024_CVPR, author = {Rebain, Daniel and Yazdani, Soroosh and Yi, Kwang Moo and Tagliasacchi, Andrea}, title = {Neural Fields as Distributions: Signal Processing Beyond Euclidean Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4274-4283} }
Style Blind Domain Generalized Semantic Segmentation via Covariance Alignment and Semantic Consistence Contrastive Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ahn_2024_CVPR, author = {Ahn, Woo-Jin and Yang, Geun-Yeong and Choi, Hyun-Duck and Lim, Myo-Taeg}, title = {Style Blind Domain Generalized Semantic Segmentation via Covariance Alignment and Semantic Consistence Contrastive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3616-3626} }
X-3D: Explicit 3D Structure Modeling for Point Cloud Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2024_CVPR, author = {Sun, Shuofeng and Rao, Yongming and Lu, Jiwen and Yan, Haibin}, title = {X-3D: Explicit 3D Structure Modeling for Point Cloud Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5074-5083} }
One More Step: A Versatile Plug-and-Play Module for Rectifying Diffusion Schedule Flaws and Enhancing Low-Frequency Controls-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2024_CVPR, author = {Hu, Minghui and Zheng, Jianbin and Zheng, Chuanxia and Wang, Chaoyue and Tao, Dacheng and Cham, Tat-Jen}, title = {One More Step: A Versatile Plug-and-Play Module for Rectifying Diffusion Schedule Flaws and Enhancing Low-Frequency Controls}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7331-7340} }
HIVE: Harnessing Human Feedback for Instructional Visual Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Shu and Yang, Xinyi and Feng, Yihao and Qin, Can and Chen, Chia-Chih and Yu, Ning and Chen, Zeyuan and Wang, Huan and Savarese, Silvio and Ermon, Stefano and Xiong, Caiming and Xu, Ran}, title = {HIVE: Harnessing Human Feedback for Instructional Visual Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9026-9036} }
StrokeFaceNeRF: Stroke-based Facial Appearance Editing in Neural Radiance Field-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Xiao-Juan and Zhang, Dingxi and Chen, Shu-Yu and Liu, Feng-Lin}, title = {StrokeFaceNeRF: Stroke-based Facial Appearance Editing in Neural Radiance Field}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7538-7547} }
ProxyCap: Real-time Monocular Full-body Capture in World Space via Human-Centric Proxy-to-Motion Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yuxiang and Zhang, Hongwen and Hu, Liangxiao and Zhang, Jiajun and Yi, Hongwei and Zhang, Shengping and Liu, Yebin}, title = {ProxyCap: Real-time Monocular Full-body Capture in World Space via Human-Centric Proxy-to-Motion Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1954-1964} }
On the Robustness of Language Guidance for Low-Level Vision Tasks: Findings from Depth Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chatterjee_2024_CVPR, author = {Chatterjee, Agneet and Gokhale, Tejas and Baral, Chitta and Yang, Yezhou}, title = {On the Robustness of Language Guidance for Low-Level Vision Tasks: Findings from Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2794-2803} }
UFOGen: You Forward Once Large Scale Text-to-Image Generation via Diffusion GANs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Yanwu and Zhao, Yang and Xiao, Zhisheng and Hou, Tingbo}, title = {UFOGen: You Forward Once Large Scale Text-to-Image Generation via Diffusion GANs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8196-8206} }
A Dual-Augmentor Framework for Domain Generalization in 3D Human Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2024_CVPR, author = {Peng, Qucheng and Zheng, Ce and Chen, Chen}, title = {A Dual-Augmentor Framework for Domain Generalization in 3D Human Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2240-2249} }
ACT-Diffusion: Efficient Adversarial Consistency Training for One-step Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Kong_2024_CVPR, author = {Kong, Fei and Duan, Jinhao and Sun, Lichao and Cheng, Hao and Xu, Renjing and Shen, Hengtao and Zhu, Xiaofeng and Shi, Xiaoshuang and Xu, Kaidi}, title = {ACT-Diffusion: Efficient Adversarial Consistency Training for One-step Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8890-8899} }
Spectral Meets Spatial: Harmonising 3D Shape Matching and Interpolation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2024_CVPR, author = {Cao, Dongliang and Eisenberger, Marvin and El Amrani, Nafie and Cremers, Daniel and Bernard, Florian}, title = {Spectral Meets Spatial: Harmonising 3D Shape Matching and Interpolation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3658-3668} }
Emu Edit: Precise Image Editing via Recognition and Generation Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sheynin_2024_CVPR, author = {Sheynin, Shelly and Polyak, Adam and Singer, Uriel and Kirstain, Yuval and Zohar, Amit and Ashual, Oron and Parikh, Devi and Taigman, Yaniv}, title = {Emu Edit: Precise Image Editing via Recognition and Generation Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8871-8879} }
Face2Diffusion for Fast and Editable Face Personalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shiohara_2024_CVPR, author = {Shiohara, Kaede and Yamasaki, Toshihiko}, title = {Face2Diffusion for Fast and Editable Face Personalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6850-6859} }
Dancing with Still Images: Video Distillation via Static-Dynamic Disentanglement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Ziyu and Xu, Yue and Lu, Cewu and Li, Yong-Lu}, title = {Dancing with Still Images: Video Distillation via Static-Dynamic Disentanglement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6296-6304} }
UniRepLKNet: A Universal Perception Large-Kernel ConvNet for Audio Video Point Cloud Time-Series and Image Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ding_2024_CVPR, author = {Ding, Xiaohan and Zhang, Yiyuan and Ge, Yixiao and Zhao, Sijie and Song, Lin and Yue, Xiangyu and Shan, Ying}, title = {UniRepLKNet: A Universal Perception Large-Kernel ConvNet for Audio Video Point Cloud Time-Series and Image Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5513-5524} }
SwiftBrush: One-Step Text-to-Image Diffusion Model with Variational Score Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2024_CVPR, author = {Nguyen, Thuan Hoang and Tran, Anh}, title = {SwiftBrush: One-Step Text-to-Image Diffusion Model with Variational Score Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7807-7816} }
DEADiff: An Efficient Stylization Diffusion Model with Disentangled Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qi_2024_CVPR, author = {Qi, Tianhao and Fang, Shancheng and Wu, Yanze and Xie, Hongtao and Liu, Jiawei and Chen, Lang and He, Qian and Zhang, Yongdong}, title = {DEADiff: An Efficient Stylization Diffusion Model with Disentangled Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8693-8702} }
Exact Fusion via Feature Distribution Matching for Few-shot Image Generation-
[pdf]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Yingbo and Ye, Yutong and Zhang, Pengyu and Wei, Xian and Chen, Mingsong}, title = {Exact Fusion via Feature Distribution Matching for Few-shot Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8383-8392} }
CoDeF: Content Deformation Fields for Temporally Consistent Video Processing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ouyang_2024_CVPR, author = {Ouyang, Hao and Wang, Qiuyu and Xiao, Yuxi and Bai, Qingyan and Zhang, Juntao and Zheng, Kecheng and Zhou, Xiaowei and Chen, Qifeng and Shen, Yujun}, title = {CoDeF: Content Deformation Fields for Temporally Consistent Video Processing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8089-8099} }
QUADify: Extracting Meshes with Pixel-level Details and Materials from Images-
[pdf]
[supp]
[bibtex]@InProceedings{Fruhauf_2024_CVPR, author = {Fr\"uhauf, Maximilian and Riemenschneider, Hayko and Gross, Markus and Schroers, Christopher}, title = {QUADify: Extracting Meshes with Pixel-level Details and Materials from Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4661-4670} }
RecDiffusion: Rectangling for Image Stitching with Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Tianhao and Li, Haipeng and Wang, Ziyi and Luo, Ao and Zhang, Chen-Lin and Li, Jiajun and Zeng, Bing and Liu, Shuaicheng}, title = {RecDiffusion: Rectangling for Image Stitching with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2692-2701} }
Eclipse: Disambiguating Illumination and Materials using Unintended Shadows-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Verbin_2024_CVPR, author = {Verbin, Dor and Mildenhall, Ben and Hedman, Peter and Barron, Jonathan T. and Zickler, Todd and Srinivasan, Pratul P.}, title = {Eclipse: Disambiguating Illumination and Materials using Unintended Shadows}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {77-86} }
Balancing Act: Distribution-Guided Debiasing in Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Parihar_2024_CVPR, author = {Parihar, Rishubh and Bhat, Abhijnya and Basu, Abhipsa and Mallick, Saswat and Kundu, Jogendra Nath and Babu, R. Venkatesh}, title = {Balancing Act: Distribution-Guided Debiasing in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6668-6678} }
Differentiable Point-based Inverse Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chung_2024_CVPR, author = {Chung, Hoon-Gyu and Choi, Seokjun and Baek, Seung-Hwan}, title = {Differentiable Point-based Inverse Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4399-4409} }
A Unified and Interpretable Emotion Representation and Expression Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Paskaleva_2024_CVPR, author = {Paskaleva, Reni and Holubakha, Mykyta and Ilic, Andela and Motamed, Saman and Van Gool, Luc and Paudel, Danda}, title = {A Unified and Interpretable Emotion Representation and Expression Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2447-2456} }
Upscale-A-Video: Temporal-Consistent Diffusion Model for Real-World Video Super-Resolution-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Shangchen and Yang, Peiqing and Wang, Jianyi and Luo, Yihang and Loy, Chen Change}, title = {Upscale-A-Video: Temporal-Consistent Diffusion Model for Real-World Video Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2535-2545} }
4D-DRESS: A 4D Dataset of Real-World Human Clothing With Semantic Annotations-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Wenbo and Ho, Hsuan-I and Guo, Chen and Rong, Boxiang and Grigorev, Artur and Song, Jie and Zarate, Juan Jose and Hilliges, Otmar}, title = {4D-DRESS: A 4D Dataset of Real-World Human Clothing With Semantic Annotations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {550-560} }
Specularity Factorization for Low-Light Enhancement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Saini_2024_CVPR, author = {Saini, Saurabh and Narayanan, P J}, title = {Specularity Factorization for Low-Light Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1-12} }
Paint3D: Paint Anything 3D with Lighting-Less Texture Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2024_CVPR, author = {Zeng, Xianfang and Chen, Xin and Qi, Zhongqi and Liu, Wen and Zhao, Zibo and Wang, Zhibin and Fu, Bin and Liu, Yong and Yu, Gang}, title = {Paint3D: Paint Anything 3D with Lighting-Less Texture Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4252-4262} }
MS-MANO: Enabling Hand Pose Tracking with Biomechanical Constraints-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2024_CVPR, author = {Xie, Pengfei and Xu, Wenqiang and Tang, Tutian and Yu, Zhenjun and Lu, Cewu}, title = {MS-MANO: Enabling Hand Pose Tracking with Biomechanical Constraints}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2382-2392} }
Generate Like Experts: Multi-Stage Font Generation by Incorporating Font Transfer Process into Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Fu_2024_CVPR, author = {Fu, Bin and Yu, Fanghua and Liu, Anran and Wang, Zixuan and Wen, Jie and He, Junjun and Qiao, Yu}, title = {Generate Like Experts: Multi-Stage Font Generation by Incorporating Font Transfer Process into Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6892-6901} }
Diffuse Attend and Segment: Unsupervised Zero-Shot Segmentation using Stable Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tian_2024_CVPR, author = {Tian, Junjiao and Aggarwal, Lavisha and Colaco, Andrea and Kira, Zsolt and Gonzalez-Franco, Mar}, title = {Diffuse Attend and Segment: Unsupervised Zero-Shot Segmentation using Stable Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3554-3563} }
Implicit Discriminative Knowledge Learning for Visible-Infrared Person Re-Identification-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ren_2024_CVPR, author = {Ren, Kaijie and Zhang, Lei}, title = {Implicit Discriminative Knowledge Learning for Visible-Infrared Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {393-402} }
Gradient Alignment for Cross-Domain Face Anti-Spoofing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Le_2024_CVPR, author = {Le, Binh M. and Woo, Simon S.}, title = {Gradient Alignment for Cross-Domain Face Anti-Spoofing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {188-199} }
OpticalDR: A Deep Optical Imaging Model for Privacy-Protective Depression Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2024_CVPR, author = {Pan, Yuchen and Jiang, Junjun and Jiang, Kui and Wu, Zhihao and Yu, Keyuan and Liu, Xianming}, title = {OpticalDR: A Deep Optical Imaging Model for Privacy-Protective Depression Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1303-1312} }
Observation-Guided Diffusion Probabilistic Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2024_CVPR, author = {Kang, Junoh and Choi, Jinyoung and Choi, Sungik and Han, Bohyung}, title = {Observation-Guided Diffusion Probabilistic Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8323-8331} }
Spatial-Aware Regression for Keypoint Localization-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Dongkai and Zhang, Shiliang}, title = {Spatial-Aware Regression for Keypoint Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {624-633} }
EFormer: Enhanced Transformer towards Semantic-Contour Features of Foreground for Portraits Matting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Zitao and Miao, Qiguang and Xi, Yue and Zhao, Peipei}, title = {EFormer: Enhanced Transformer towards Semantic-Contour Features of Foreground for Portraits Matting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3880-3889} }
MultiPly: Reconstruction of Multiple People from Monocular Video in the Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2024_CVPR, author = {Jiang, Zeren and Guo, Chen and Kaufmann, Manuel and Jiang, Tianjian and Valentin, Julien and Hilliges, Otmar and Song, Jie}, title = {MultiPly: Reconstruction of Multiple People from Monocular Video in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {109-118} }
ConsistNet: Enforcing 3D Consistency for Multi-view Images Diffusion-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Jiayu and Cheng, Ziang and Duan, Yunfei and Ji, Pan and Li, Hongdong}, title = {ConsistNet: Enforcing 3D Consistency for Multi-view Images Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7079-7088} }
GenN2N: Generative NeRF2NeRF Translation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Xiangyue and Xue, Han and Luo, Kunming and Tan, Ping and Yi, Li}, title = {GenN2N: Generative NeRF2NeRF Translation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5105-5114} }
Universal Robustness via Median Randomized Smoothing for Real-World Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chaouai_2024_CVPR, author = {Chaouai, Zakariya and Tamaazousti, Mohamed}, title = {Universal Robustness via Median Randomized Smoothing for Real-World Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9059-9068} }
One-dimensional Adapter to Rule Them All: Concepts Diffusion Models and Erasing Applications-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lyu_2024_CVPR, author = {Lyu, Mengyao and Yang, Yuhong and Hong, Haiwen and Chen, Hui and Jin, Xuan and He, Yuan and Xue, Hui and Han, Jungong and Ding, Guiguang}, title = {One-dimensional Adapter to Rule Them All: Concepts Diffusion Models and Erasing Applications}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7559-7568} }
Kandinsky Conformal Prediction: Efficient Calibration of Image Segmentation Algorithms-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Brunekreef_2024_CVPR, author = {Brunekreef, Joren and Marcus, Eric and Sheombarsing, Ray and Sonke, Jan-Jakob and Teuwen, Jonas}, title = {Kandinsky Conformal Prediction: Efficient Calibration of Image Segmentation Algorithms}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4135-4143} }
Diversity-aware Channel Pruning for StyleGAN Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chung_2024_CVPR, author = {Chung, Jiwoo and Hyun, Sangeek and Shim, Sang-Heon and Heo, Jae-Pil}, title = {Diversity-aware Channel Pruning for StyleGAN Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7902-7911} }
Neural Clustering based Visual Representation Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Guikun and Li, Xia and Yang, Yi and Wang, Wenguan}, title = {Neural Clustering based Visual Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5714-5725} }
Sparse Semi-DETR: Sparse Learnable Queries for Semi-Supervised Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Shehzadi_2024_CVPR, author = {Shehzadi, Tahira and Hashmi, Khurram Azeem and Stricker, Didier and Afzal, Muhammad Zeshan}, title = {Sparse Semi-DETR: Sparse Learnable Queries for Semi-Supervised Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5840-5850} }
Uncertainty-Aware Source-Free Adaptive Image Super-Resolution with Wavelet Augmentation Transformer-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ai_2024_CVPR, author = {Ai, Yuang and Zhou, Xiaoqiang and Huang, Huaibo and Zhang, Lei and He, Ran}, title = {Uncertainty-Aware Source-Free Adaptive Image Super-Resolution with Wavelet Augmentation Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8142-8152} }
Spacetime Gaussian Feature Splatting for Real-Time Dynamic View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Zhan and Chen, Zhang and Li, Zhong and Xu, Yi}, title = {Spacetime Gaussian Feature Splatting for Real-Time Dynamic View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8508-8520} }
Instruct-Imagen: Image Generation with Multi-modal Instruction-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2024_CVPR, author = {Hu, Hexiang and Chan, Kelvin C.K. and Su, Yu-Chuan and Chen, Wenhu and Li, Yandong and Sohn, Kihyuk and Zhao, Yang and Ben, Xue and Gong, Boqing and Cohen, William and Chang, Ming-Wei and Jia, Xuhui}, title = {Instruct-Imagen: Image Generation with Multi-modal Instruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4754-4763} }
Rethinking Few-shot 3D Point Cloud Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{An_2024_CVPR, author = {An, Zhaochong and Sun, Guolei and Liu, Yun and Liu, Fayao and Wu, Zongwei and Wang, Dan and Van Gool, Luc and Belongie, Serge}, title = {Rethinking Few-shot 3D Point Cloud Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3996-4006} }
GreedyViG: Dynamic Axial Graph Construction for Efficient Vision GNNs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Munir_2024_CVPR, author = {Munir, Mustafa and Avery, William and Rahman, Md Mostafijur and Marculescu, Radu}, title = {GreedyViG: Dynamic Axial Graph Construction for Efficient Vision GNNs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6118-6127} }
Relightable and Animatable Neural Avatar from Sparse-View Video-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Zhen and Peng, Sida and Geng, Chen and Mou, Linzhan and Yan, Zihan and Sun, Jiaming and Bao, Hujun and Zhou, Xiaowei}, title = {Relightable and Animatable Neural Avatar from Sparse-View Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {990-1000} }
Pose Adapted Shape Learning for Large-Pose Face Reenactment-
[pdf]
[supp]
[bibtex]@InProceedings{Hsu_2024_CVPR, author = {Hsu, Gee-Sern Jison and Zhang, Jie-Ying and Hsiang, Huang Yu and Hong, Wei-Jie}, title = {Pose Adapted Shape Learning for Large-Pose Face Reenactment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7413-7422} }
NRDF: Neural Riemannian Distance Fields for Learning Articulated Pose Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2024_CVPR, author = {He, Yannan and Tiwari, Garvita and Birdal, Tolga and Lenssen, Jan Eric and Pons-Moll, Gerard}, title = {NRDF: Neural Riemannian Distance Fields for Learning Articulated Pose Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1661-1671} }
RepAn: Enhanced Annealing through Re-parameterization-
[pdf]
[supp]
[bibtex]@InProceedings{Fei_2024_CVPR, author = {Fei, Xiang and Zheng, Xiawu and Wang, Yan and Chao, Fei and Wu, Chenglin and Cao, Liujuan}, title = {RepAn: Enhanced Annealing through Re-parameterization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5798-5808} }
DreamControl: Control-Based Text-to-3D Generation with 3D Self-Prior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Tianyu and Zeng, Yihan and Zhang, Zhilu and Xu, Wan and Xu, Hang and Xu, Songcen and Lau, Rynson W.H. and Zuo, Wangmeng}, title = {DreamControl: Control-Based Text-to-3D Generation with 3D Self-Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5364-5373} }
ODIN: A Single Model for 2D and 3D Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jain_2024_CVPR, author = {Jain, Ayush and Katara, Pushkal and Gkanatsios, Nikolaos and Harley, Adam W. and Sarch, Gabriel and Aggarwal, Kriti and Chaudhary, Vishrav and Fragkiadaki, Katerina}, title = {ODIN: A Single Model for 2D and 3D Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3564-3574} }
InitNO: Boosting Text-to-Image Diffusion Models via Initial Noise Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2024_CVPR, author = {Guo, Xiefan and Liu, Jinlin and Cui, Miaomiao and Li, Jiankai and Yang, Hongyu and Huang, Di}, title = {InitNO: Boosting Text-to-Image Diffusion Models via Initial Noise Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9380-9389} }
Multimodal Sense-Informed Forecasting of 3D Human Motions-
[pdf]
[bibtex]@InProceedings{Lou_2024_CVPR, author = {Lou, Zhenyu and Cui, Qiongjie and Wang, Haofan and Tang, Xu and Zhou, Hong}, title = {Multimodal Sense-Informed Forecasting of 3D Human Motions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2144-2154} }
FlowerFormer: Empowering Neural Architecture Encoding using a Flow-aware Graph Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hwang_2024_CVPR, author = {Hwang, Dongyeong and Kim, Hyunju and Kim, Sunwoo and Shin, Kijung}, title = {FlowerFormer: Empowering Neural Architecture Encoding using a Flow-aware Graph Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6128-6137} }
EmoGen: Emotional Image Content Generation with Text-to-Image Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Jingyuan and Feng, Jiawei and Huang, Hui}, title = {EmoGen: Emotional Image Content Generation with Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6358-6368} }
Neural Implicit Representation for Building Digital Twins of Unknown Articulated Objects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Weng_2024_CVPR, author = {Weng, Yijia and Wen, Bowen and Tremblay, Jonathan and Blukis, Valts and Fox, Dieter and Guibas, Leonidas and Birchfield, Stan}, title = {Neural Implicit Representation for Building Digital Twins of Unknown Articulated Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3141-3150} }
Vanishing-Point-Guided Video Semantic Segmentation of Driving Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2024_CVPR, author = {Guo, Diandian and Fan, Deng-Ping and Lu, Tongyu and Sakaridis, Christos and Van Gool, Luc}, title = {Vanishing-Point-Guided Video Semantic Segmentation of Driving Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3544-3553} }
LAMP: Learn A Motion Pattern for Few-Shot Video Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Ruiqi and Chen, Liangyu and Yang, Tong and Guo, Chunle and Li, Chongyi and Zhang, Xiangyu}, title = {LAMP: Learn A Motion Pattern for Few-Shot Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7089-7098} }
Language-driven Object Fusion into Neural Radiance Fields with Pose-Conditioned Dataset Updates-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shum_2024_CVPR, author = {Shum, Ka Chun and Kim, Jaeyeon and Hua, Binh-Son and Nguyen, Duc Thanh and Yeung, Sai-Kit}, title = {Language-driven Object Fusion into Neural Radiance Fields with Pose-Conditioned Dataset Updates}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5176-5187} }
DREAM: Diffusion Rectification and Estimation-Adaptive Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Jinxin and Ding, Tianyu and Chen, Tianyi and Jiang, Jiachen and Zharkov, Ilya and Zhu, Zhihui and Liang, Luming}, title = {DREAM: Diffusion Rectification and Estimation-Adaptive Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8342-8351} }
Back