CVPR 2024 Open Access Repository

Papers

Back
DPMesh: Exploiting Diffusion Prior for Occluded Human Mesh Recovery: Yixuan Zhu,

Ao Li,

Yansong Tang,

Wenliang Zhao,

Jie Zhou,

Jiwen Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2024_CVPR, author = {Zhu, Yixuan and Li, Ao and Tang, Yansong and Zhao, Wenliang and Zhou, Jie and Lu, Jiwen}, title = {DPMesh: Exploiting Diffusion Prior for Occluded Human Mesh Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1101-1110} }
HEAL-SWIN: A Vision Transformer On The Sphere: Oscar Carlsson,

Jan E. Gerken,

Hampus Linander,

Heiner Spieß,

Fredrik Ohlsson,

Christoffer Petersson,

Daniel Persson; [pdf] [supp]
[bibtex]
@InProceedings{Carlsson_2024_CVPR, author = {Carlsson, Oscar and Gerken, Jan E. and Linander, Hampus and Spie{\ss}, Heiner and Ohlsson, Fredrik and Petersson, Christoffer and Persson, Daniel}, title = {HEAL-SWIN: A Vision Transformer On The Sphere}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6067-6077} }
3D Paintbrush: Local Stylization of 3D Shapes with Cascaded Score Distillation: Dale Decatur,

Itai Lang,

Kfir Aberman,

Rana Hanocka; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Decatur_2024_CVPR, author = {Decatur, Dale and Lang, Itai and Aberman, Kfir and Hanocka, Rana}, title = {3D Paintbrush: Local Stylization of 3D Shapes with Cascaded Score Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4473-4483} }
Guided Slot Attention for Unsupervised Video Object Segmentation: Minhyeok Lee,

Suhwan Cho,

Dogyoon Lee,

Chaewon Park,

Jungho Lee,

Sangyoun Lee; [pdf] [arXiv]
[bibtex]
@InProceedings{Lee_2024_CVPR, author = {Lee, Minhyeok and Cho, Suhwan and Lee, Dogyoon and Park, Chaewon and Lee, Jungho and Lee, Sangyoun}, title = {Guided Slot Attention for Unsupervised Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3807-3816} }
Programmable Motion Generation for Open-Set Motion Control Tasks: Hanchao Liu,

Xiaohang Zhan,

Shaoli Huang,

Tai-Jiang Mu,

Ying Shan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Hanchao and Zhan, Xiaohang and Huang, Shaoli and Mu, Tai-Jiang and Shan, Ying}, title = {Programmable Motion Generation for Open-Set Motion Control Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1399-1408} }
SCE-MAE: Selective Correspondence Enhancement with Masked Autoencoder for Self-Supervised Landmark Estimation: Kejia Yin,

Varshanth Rao,

Ruowei Jiang,

Xudong Liu,

Parham Aarabi,

David B. Lindell; [pdf] [supp]
[bibtex]
@InProceedings{Yin_2024_CVPR, author = {Yin, Kejia and Rao, Varshanth and Jiang, Ruowei and Liu, Xudong and Aarabi, Parham and Lindell, David B.}, title = {SCE-MAE: Selective Correspondence Enhancement with Masked Autoencoder for Self-Supervised Landmark Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1313-1322} }
LAKE-RED: Camouflaged Images Generation by Latent Background Knowledge Retrieval-Augmented Diffusion: Pancheng Zhao,

Peng Xu,

Pengda Qin,

Deng-Ping Fan,

Zhicheng Zhang,

Guoli Jia,

Bowen Zhou,

Jufeng Yang; [pdf]
[bibtex]
@InProceedings{Zhao_2024_CVPR, author = {Zhao, Pancheng and Xu, Peng and Qin, Pengda and Fan, Deng-Ping and Zhang, Zhicheng and Jia, Guoli and Zhou, Bowen and Yang, Jufeng}, title = {LAKE-RED: Camouflaged Images Generation by Latent Background Knowledge Retrieval-Augmented Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4092-4101} }
TIGER: Time-Varying Denoising Model for 3D Point Cloud Generation with Diffusion Process: Zhiyuan Ren,

Minchul Kim,

Feng Liu,

Xiaoming Liu; [pdf] [supp]
[bibtex]
@InProceedings{Ren_2024_CVPR, author = {Ren, Zhiyuan and Kim, Minchul and Liu, Feng and Liu, Xiaoming}, title = {TIGER: Time-Varying Denoising Model for 3D Point Cloud Generation with Diffusion Process}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9462-9471} }
ASH: Animatable Gaussian Splats for Efficient and Photoreal Human Rendering: Haokai Pang,

Heming Zhu,

Adam Kortylewski,

Christian Theobalt,

Marc Habermann; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pang_2024_CVPR, author = {Pang, Haokai and Zhu, Heming and Kortylewski, Adam and Theobalt, Christian and Habermann, Marc}, title = {ASH: Animatable Gaussian Splats for Efficient and Photoreal Human Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1165-1175} }
ArtAdapter: Text-to-Image Style Transfer using Multi-Level Style Encoder and Explicit Adaptation: Dar-Yen Chen,

Hamish Tennent,

Ching-Wen Hsu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Dar-Yen and Tennent, Hamish and Hsu, Ching-Wen}, title = {ArtAdapter: Text-to-Image Style Transfer using Multi-Level Style Encoder and Explicit Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8619-8628} }
Activity-Biometrics: Person Identification from Daily Activities: Shehreen Azad,

Yogesh Singh Rawat; [pdf] [supp]
[bibtex]
@InProceedings{Azad_2024_CVPR, author = {Azad, Shehreen and Rawat, Yogesh Singh}, title = {Activity-Biometrics: Person Identification from Daily Activities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {287-296} }
Z*: Zero-shot Style Transfer via Attention Reweighting: Yingying Deng,

Xiangyu He,

Fan Tang,

Weiming Dong; [pdf] [supp]
[bibtex]
@InProceedings{Deng_2024_CVPR, author = {Deng, Yingying and He, Xiangyu and Tang, Fan and Dong, Weiming}, title = {Z*: Zero-shot Style Transfer via Attention Reweighting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6934-6944} }
Learning Continuous 3D Words for Text-to-Image Generation: Ta-Ying Cheng,

Matheus Gadelha,

Thibault Groueix,

Matthew Fisher,

Radomir Mech,

Andrew Markham,

Niki Trigoni; [pdf] [arXiv]
[bibtex]
@InProceedings{Cheng_2024_CVPR, author = {Cheng, Ta-Ying and Gadelha, Matheus and Groueix, Thibault and Fisher, Matthew and Mech, Radomir and Markham, Andrew and Trigoni, Niki}, title = {Learning Continuous 3D Words for Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6753-6762} }
MarkovGen: Structured Prediction for Efficient Text-to-Image Generation: Sadeep Jayasumana,

Daniel Glasner,

Srikumar Ramalingam,

Andreas Veit,

Ayan Chakrabarti,

Sanjiv Kumar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jayasumana_2024_CVPR, author = {Jayasumana, Sadeep and Glasner, Daniel and Ramalingam, Srikumar and Veit, Andreas and Chakrabarti, Ayan and Kumar, Sanjiv}, title = {MarkovGen: Structured Prediction for Efficient Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9316-9325} }
HashPoint: Accelerated Point Searching and Sampling for Neural Rendering: Jiahao Ma,

Miaomiao Liu,

David Ahmedt-Aristizabal,

Chuong Nguyen; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2024_CVPR, author = {Ma, Jiahao and Liu, Miaomiao and Ahmedt-Aristizabal, David and Nguyen, Chuong}, title = {HashPoint: Accelerated Point Searching and Sampling for Neural Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4462-4472} }
MFP: Making Full Use of Probability Maps for Interactive Image Segmentation: Chaewon Lee,

Seon-Ho Lee,

Chang-Su Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2024_CVPR, author = {Lee, Chaewon and Lee, Seon-Ho and Kim, Chang-Su}, title = {MFP: Making Full Use of Probability Maps for Interactive Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4051-4059} }
StyLitGAN: Image-Based Relighting via Latent Control: Anand Bhattad,

James Soole,

D.A. Forsyth; [pdf] [supp]
[bibtex]
@InProceedings{Bhattad_2024_CVPR, author = {Bhattad, Anand and Soole, James and Forsyth, D.A.}, title = {StyLitGAN: Image-Based Relighting via Latent Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4231-4240} }
MoMask: Generative Masked Modeling of 3D Human Motions: Chuan Guo,

Yuxuan Mu,

Muhammad Gohar Javed,

Sen Wang,

Li Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2024_CVPR, author = {Guo, Chuan and Mu, Yuxuan and Javed, Muhammad Gohar and Wang, Sen and Cheng, Li}, title = {MoMask: Generative Masked Modeling of 3D Human Motions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1900-1910} }
Fitting Flats to Flats: Gabriel Dogadov,

Ugo Finnendahl,

Marc Alexa; [pdf] [supp]
[bibtex]
@InProceedings{Dogadov_2024_CVPR, author = {Dogadov, Gabriel and Finnendahl, Ugo and Alexa, Marc}, title = {Fitting Flats to Flats}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5439-5447} }
Coupled Laplacian Eigenmaps for Locally-Aware 3D Rigid Point Cloud Matching: Matteo Bastico,

Etienne Decencière,

Laurent Corté,

Yannick Tillier,

David Ryckelynck; [pdf] [supp]
[bibtex]
@InProceedings{Bastico_2024_CVPR, author = {Bastico, Matteo and Decenci\`ere, Etienne and Cort\'e, Laurent and Tillier, Yannick and Ryckelynck, David}, title = {Coupled Laplacian Eigenmaps for Locally-Aware 3D Rigid Point Cloud Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3447-3458} }
Scaling Up Video Summarization Pretraining with Large Language Models: Dawit Mureja Argaw,

Seunghyun Yoon,

Fabian Caba Heilbron,

Hanieh Deilamsalehy,

Trung Bui,

Zhaowen Wang,

Franck Dernoncourt,

Joon Son Chung; [pdf] [arXiv]
[bibtex]
@InProceedings{Argaw_2024_CVPR, author = {Argaw, Dawit Mureja and Yoon, Seunghyun and Heilbron, Fabian Caba and Deilamsalehy, Hanieh and Bui, Trung and Wang, Zhaowen and Dernoncourt, Franck and Chung, Joon Son}, title = {Scaling Up Video Summarization Pretraining with Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8332-8341} }
Continuous Optical Zooming: A Benchmark for Arbitrary-Scale Image Super-Resolution in Real World: Huiyuan Fu,

Fei Peng,

Xianwei Li,

Yejun Li,

Xin Wang,

Huadong Ma; [pdf]
[bibtex]
@InProceedings{Fu_2024_CVPR, author = {Fu, Huiyuan and Peng, Fei and Li, Xianwei and Li, Yejun and Wang, Xin and Ma, Huadong}, title = {Continuous Optical Zooming: A Benchmark for Arbitrary-Scale Image Super-Resolution in Real World}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3035-3044} }
Sharingan: A Transformer Architecture for Multi-Person Gaze Following: Samy Tafasca,

Anshul Gupta,

Jean-Marc Odobez; [pdf] [supp]
[bibtex]
@InProceedings{Tafasca_2024_CVPR, author = {Tafasca, Samy and Gupta, Anshul and Odobez, Jean-Marc}, title = {Sharingan: A Transformer Architecture for Multi-Person Gaze Following}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2008-2017} }
Open-Vocabulary Segmentation with Semantic-Assisted Calibration: Yong Liu,

Sule Bai,

Guanbin Li,

Yitong Wang,

Yansong Tang; [pdf] [arXiv ]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Yong and Bai, Sule and Li, Guanbin and Wang, Yitong and Tang, Yansong}, title = {Open-Vocabulary Segmentation with Semantic-Assisted Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3491-3500} }
Towards a Perceptual Evaluation Framework for Lighting Estimation: Justine Giroux,

Mohammad Reza Karimi Dastjerdi,

Yannick Hold-Geoffroy,

Javier Vazquez-Corral,

Jean-François Lalonde; [pdf] [arXiv]
[bibtex]
@InProceedings{Giroux_2024_CVPR, author = {Giroux, Justine and Dastjerdi, Mohammad Reza Karimi and Hold-Geoffroy, Yannick and Vazquez-Corral, Javier and Lalonde, Jean-Fran\c{c}ois}, title = {Towards a Perceptual Evaluation Framework for Lighting Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4410-4419} }
On Exact Inversion of DPM-Solvers: Seongmin Hong,

Kyeonghyun Lee,

Suh Yoon Jeon,

Hyewon Bae,

Se Young Chun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hong_2024_CVPR, author = {Hong, Seongmin and Lee, Kyeonghyun and Jeon, Suh Yoon and Bae, Hyewon and Chun, Se Young}, title = {On Exact Inversion of DPM-Solvers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7069-7078} }
CAMEL: CAusal Motion Enhancement Tailored for Lifting Text-driven Video Editing: Guiwei Zhang,

Tianyu Zhang,

Guanglin Niu,

Zichang Tan,

Yalong Bai,

Qing Yang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Guiwei and Zhang, Tianyu and Niu, Guanglin and Tan, Zichang and Bai, Yalong and Yang, Qing}, title = {CAMEL: CAusal Motion Enhancement Tailored for Lifting Text-driven Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9079-9088} }
FocSAM: Delving Deeply into Focused Objects in Segmenting Anything: You Huang,

Zongyu Lan,

Liujuan Cao,

Xianming Lin,

Shengchuan Zhang,

Guannan Jiang,

Rongrong Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2024_CVPR, author = {Huang, You and Lan, Zongyu and Cao, Liujuan and Lin, Xianming and Zhang, Shengchuan and Jiang, Guannan and Ji, Rongrong}, title = {FocSAM: Delving Deeply into Focused Objects in Segmenting Anything}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3120-3130} }
PRDP: Proximal Reward Difference Prediction for Large-Scale Reward Finetuning of Diffusion Models: Fei Deng,

Qifei Wang,

Wei Wei,

Tingbo Hou,

Matthias Grundmann; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deng_2024_CVPR, author = {Deng, Fei and Wang, Qifei and Wei, Wei and Hou, Tingbo and Grundmann, Matthias}, title = {PRDP: Proximal Reward Difference Prediction for Large-Scale Reward Finetuning of Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7423-7433} }
Task-Customized Mixture of Adapters for General Image Fusion: Pengfei Zhu,

Yang Sun,

Bing Cao,

Qinghua Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2024_CVPR, author = {Zhu, Pengfei and Sun, Yang and Cao, Bing and Hu, Qinghua}, title = {Task-Customized Mixture of Adapters for General Image Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7099-7108} }
Artist-Friendly Relightable and Animatable Neural Heads: Yingyan Xu,

Prashanth Chandran,

Sebastian Weiss,

Markus Gross,

Gaspard Zoss,

Derek Bradley; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Yingyan and Chandran, Prashanth and Weiss, Sebastian and Gross, Markus and Zoss, Gaspard and Bradley, Derek}, title = {Artist-Friendly Relightable and Animatable Neural Heads}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2457-2467} }
From Feature to Gaze: A Generalizable Replacement of Linear Layer for Gaze Estimation: Yiwei Bao,

Feng Lu; [pdf]
[bibtex]
@InProceedings{Bao_2024_CVPR, author = {Bao, Yiwei and Lu, Feng}, title = {From Feature to Gaze: A Generalizable Replacement of Linear Layer for Gaze Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1409-1418} }
Boosting Image Restoration via Priors from Pre-trained Models: Xiaogang Xu,

Shu Kong,

Tao Hu,

Zhe Liu,

Hujun Bao; [pdf] [arXiv]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Xiaogang and Kong, Shu and Hu, Tao and Liu, Zhe and Bao, Hujun}, title = {Boosting Image Restoration via Priors from Pre-trained Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2900-2909} }
VRetouchEr: Learning Cross-frame Feature Interdependence with Imperfection Flow for Face Retouching in Videos: Wen Xue,

Le Jiang,

Lianxin Xie,

Si Wu,

Yong Xu,

Hau San Wong; [pdf] [supp]
[bibtex]
@InProceedings{Xue_2024_CVPR, author = {Xue, Wen and Jiang, Le and Xie, Lianxin and Wu, Si and Xu, Yong and Wong, Hau San}, title = {VRetouchEr: Learning Cross-frame Feature Interdependence with Imperfection Flow for Face Retouching in Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9141-9150} }
Arbitrary-Scale Image Generation and Upsampling using Latent Diffusion Model and Implicit Neural Decoder: Jinseok Kim,

Tae-Kyun Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2024_CVPR, author = {Kim, Jinseok and Kim, Tae-Kyun}, title = {Arbitrary-Scale Image Generation and Upsampling using Latent Diffusion Model and Implicit Neural Decoder}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9202-9211} }
Cache Me if You Can: Accelerating Diffusion Models through Block Caching: Felix Wimbauer,

Bichen Wu,

Edgar Schoenfeld,

Xiaoliang Dai,

Ji Hou,

Zijian He,

Artsiom Sanakoyeu,

Peizhao Zhang,

Sam Tsai,

Jonas Kohler,

Christian Rupprecht,

Daniel Cremers,

Peter Vajda,

Jialiang Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wimbauer_2024_CVPR, author = {Wimbauer, Felix and Wu, Bichen and Schoenfeld, Edgar and Dai, Xiaoliang and Hou, Ji and He, Zijian and Sanakoyeu, Artsiom and Zhang, Peizhao and Tsai, Sam and Kohler, Jonas and Rupprecht, Christian and Cremers, Daniel and Vajda, Peter and Wang, Jialiang}, title = {Cache Me if You Can: Accelerating Diffusion Models through Block Caching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6211-6220} }
Identifying Important Group of Pixels using Interactions: Kosuke Sumiyasu,

Kazuhiko Kawamoto,

Hiroshi Kera; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sumiyasu_2024_CVPR, author = {Sumiyasu, Kosuke and Kawamoto, Kazuhiko and Kera, Hiroshi}, title = {Identifying Important Group of Pixels using Interactions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6017-6026} }
DIOD: Self-Distillation Meets Object Discovery: Sandra Kara,

Hejer Ammar,

Julien Denize,

Florian Chabot,

Quoc-Cuong Pham; [pdf] [supp]
[bibtex]
@InProceedings{Kara_2024_CVPR, author = {Kara, Sandra and Ammar, Hejer and Denize, Julien and Chabot, Florian and Pham, Quoc-Cuong}, title = {DIOD: Self-Distillation Meets Object Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3975-3985} }
GoMAvatar: Efficient Animatable Human Modeling from Monocular Video Using Gaussians-on-Mesh: Jing Wen,

Xiaoming Zhao,

Zhongzheng Ren,

Alexander G. Schwing,

Shenlong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wen_2024_CVPR, author = {Wen, Jing and Zhao, Xiaoming and Ren, Zhongzheng and Schwing, Alexander G. and Wang, Shenlong}, title = {GoMAvatar: Efficient Animatable Human Modeling from Monocular Video Using Gaussians-on-Mesh}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2059-2069} }
Neural Redshift: Random Networks are not Random Functions: Damien Teney,

Armand Mihai Nicolicioiu,

Valentin Hartmann,

Ehsan Abbasnejad; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Teney_2024_CVPR, author = {Teney, Damien and Nicolicioiu, Armand Mihai and Hartmann, Valentin and Abbasnejad, Ehsan}, title = {Neural Redshift: Random Networks are not Random Functions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4786-4796} }
HumanGaussian: Text-Driven 3D Human Generation with Gaussian Splatting: Xian Liu,

Xiaohang Zhan,

Jiaxiang Tang,

Ying Shan,

Gang Zeng,

Dahua Lin,

Xihui Liu,

Ziwei Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Xian and Zhan, Xiaohang and Tang, Jiaxiang and Shan, Ying and Zeng, Gang and Lin, Dahua and Liu, Xihui and Liu, Ziwei}, title = {HumanGaussian: Text-Driven 3D Human Generation with Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6646-6657} }
CosmicMan: A Text-to-Image Foundation Model for Humans: Shikai Li,

Jianglin Fu,

Kaiyuan Liu,

Wentao Wang,

Kwan-Yee Lin,

Wayne Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Shikai and Fu, Jianglin and Liu, Kaiyuan and Wang, Wentao and Lin, Kwan-Yee and Wu, Wayne}, title = {CosmicMan: A Text-to-Image Foundation Model for Humans}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6955-6965} }
JDEC: JPEG Decoding via Enhanced Continuous Cosine Coefficients: Woo Kyoung Han,

Sunghoon Im,

Jaedeok Kim,

Kyong Hwan Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2024_CVPR, author = {Han, Woo Kyoung and Im, Sunghoon and Kim, Jaedeok and Jin, Kyong Hwan}, title = {JDEC: JPEG Decoding via Enhanced Continuous Cosine Coefficients}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2784-2793} }
HOI-M^3: Capture Multiple Humans and Objects Interaction within Contextual Environment: Juze Zhang,

Jingyan Zhang,

Zining Song,

Zhanhe Shi,

Chengfeng Zhao,

Ye Shi,

Jingyi Yu,

Lan Xu,

Jingya Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Juze and Zhang, Jingyan and Song, Zining and Shi, Zhanhe and Zhao, Chengfeng and Shi, Ye and Yu, Jingyi and Xu, Lan and Wang, Jingya}, title = {HOI-M{\textasciicircum}3: Capture Multiple Humans and Objects Interaction within Contextual Environment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {516-526} }
Interactive3D: Create What You Want by Interactive 3D Generation: Shaocong Dong,

Lihe Ding,

Zhanpeng Huang,

Zibin Wang,

Tianfan Xue,

Dan Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dong_2024_CVPR, author = {Dong, Shaocong and Ding, Lihe and Huang, Zhanpeng and Wang, Zibin and Xue, Tianfan and Xu, Dan}, title = {Interactive3D: Create What You Want by Interactive 3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4999-5008} }
OmniLocalRF: Omnidirectional Local Radiance Fields from Dynamic Videos: Dongyoung Choi,

Hyeonjoong Jang,

Min H. Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2024_CVPR, author = {Choi, Dongyoung and Jang, Hyeonjoong and Kim, Min H.}, title = {OmniLocalRF: Omnidirectional Local Radiance Fields from Dynamic Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6871-6880} }
Semantic Human Mesh Reconstruction with Textures: Xiaoyu Zhan,

Jianxin Yang,

Yuanqi Li,

Jie Guo,

Yanwen Guo,

Wenping Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhan_2024_CVPR, author = {Zhan, Xiaoyu and Yang, Jianxin and Li, Yuanqi and Guo, Jie and Guo, Yanwen and Wang, Wenping}, title = {Semantic Human Mesh Reconstruction with Textures}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {142-152} }
PIA: Your Personalized Image Animator via Plug-and-Play Modules in Text-to-Image Models: Yiming Zhang,

Zhening Xing,

Yanhong Zeng,

Youqing Fang,

Kai Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yiming and Xing, Zhening and Zeng, Yanhong and Fang, Youqing and Chen, Kai}, title = {PIA: Your Personalized Image Animator via Plug-and-Play Modules in Text-to-Image Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7747-7756} }
NeRF Analogies: Example-Based Visual Attribute Transfer for NeRFs: Michael Fischer,

Zhengqin Li,

Thu Nguyen-Phuoc,

Aljaz Bozic,

Zhao Dong,

Carl Marshall,

Tobias Ritschel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fischer_2024_CVPR, author = {Fischer, Michael and Li, Zhengqin and Nguyen-Phuoc, Thu and Bozic, Aljaz and Dong, Zhao and Marshall, Carl and Ritschel, Tobias}, title = {NeRF Analogies: Example-Based Visual Attribute Transfer for NeRFs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4640-4650} }
Texture-Preserving Diffusion Models for High-Fidelity Virtual Try-On: Xu Yang,

Changxing Ding,

Zhibin Hong,

Junhao Huang,

Jin Tao,

Xiangmin Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Xu and Ding, Changxing and Hong, Zhibin and Huang, Junhao and Tao, Jin and Xu, Xiangmin}, title = {Texture-Preserving Diffusion Models for High-Fidelity Virtual Try-On}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7017-7026} }
Towards Robust Event-guided Low-Light Image Enhancement: A Large-Scale Real-World Event-Image Dataset and Novel Approach: Guoqiang Liang,

Kanghao Chen,

Hangyu Li,

Yunfan Lu,

Lin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2024_CVPR, author = {Liang, Guoqiang and Chen, Kanghao and Li, Hangyu and Lu, Yunfan and Wang, Lin}, title = {Towards Robust Event-guided Low-Light Image Enhancement: A Large-Scale Real-World Event-Image Dataset and Novel Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {23-33} }
From a Bird's Eye View to See: Joint Camera and Subject Registration without the Camera Calibration: Zekun Qian,

Ruize Han,

Wei Feng,

Song Wang; [pdf] [supp]
[bibtex]
@InProceedings{Qian_2024_CVPR, author = {Qian, Zekun and Han, Ruize and Feng, Wei and Wang, Song}, title = {From a Bird's Eye View to See: Joint Camera and Subject Registration without the Camera Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {863-873} }
Enhancing Video Super-Resolution via Implicit Resampling-based Alignment: Kai Xu,

Ziwei Yu,

Xin Wang,

Michael Bi Mi,

Angela Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Kai and Yu, Ziwei and Wang, Xin and Mi, Michael Bi and Yao, Angela}, title = {Enhancing Video Super-Resolution via Implicit Resampling-based Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2546-2555} }
Parameter Efficient Fine-tuning via Cross Block Orchestration for Segment Anything Model: Zelin Peng,

Zhengqin Xu,

Zhilin Zeng,

Lingxi Xie,

Qi Tian,

Wei Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2024_CVPR, author = {Peng, Zelin and Xu, Zhengqin and Zeng, Zhilin and Xie, Lingxi and Tian, Qi and Shen, Wei}, title = {Parameter Efficient Fine-tuning via Cross Block Orchestration for Segment Anything Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3743-3752} }
Masked and Shuffled Blind Spot Denoising for Real-World Images: Hamadi Chihaoui,

Paolo Favaro; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chihaoui_2024_CVPR, author = {Chihaoui, Hamadi and Favaro, Paolo}, title = {Masked and Shuffled Blind Spot Denoising for Real-World Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3025-3034} }
DiffusionAvatars: Deferred Diffusion for High-fidelity 3D Head Avatars: Tobias Kirschstein,

Simon Giebenhain,

Matthias Nießner; [pdf] [supp]
[bibtex]
@InProceedings{Kirschstein_2024_CVPR, author = {Kirschstein, Tobias and Giebenhain, Simon and Nie{\ss}ner, Matthias}, title = {DiffusionAvatars: Deferred Diffusion for High-fidelity 3D Head Avatars}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5481-5492} }
Data-Free Quantization via Pseudo-label Filtering: Chunxiao Fan,

Ziqi Wang,

Dan Guo,

Meng Wang; [pdf]
[bibtex]
@InProceedings{Fan_2024_CVPR, author = {Fan, Chunxiao and Wang, Ziqi and Guo, Dan and Wang, Meng}, title = {Data-Free Quantization via Pseudo-label Filtering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5589-5598} }
Generative Powers of Ten: Xiaojuan Wang,

Janne Kontkanen,

Brian Curless,

Steven M. Seitz,

Ira Kemelmacher-Shlizerman,

Ben Mildenhall,

Pratul Srinivasan,

Dor Verbin,

Aleksander Holynski; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Xiaojuan and Kontkanen, Janne and Curless, Brian and Seitz, Steven M. and Kemelmacher-Shlizerman, Ira and Mildenhall, Ben and Srinivasan, Pratul and Verbin, Dor and Holynski, Aleksander}, title = {Generative Powers of Ten}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7173-7182} }
Text-conditional Attribute Alignment across Latent Spaces for 3D Controllable Face Image Synthesis: Feifan Xu,

Rui Li,

Si Wu,

Yong Xu,

Hau San Wong; [pdf]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Feifan and Li, Rui and Wu, Si and Xu, Yong and Wong, Hau San}, title = {Text-conditional Attribute Alignment across Latent Spaces for 3D Controllable Face Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9172-9181} }
Correcting Diffusion Generation through Resampling: Yujian Liu,

Yang Zhang,

Tommi Jaakkola,

Shiyu Chang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Yujian and Zhang, Yang and Jaakkola, Tommi and Chang, Shiyu}, title = {Correcting Diffusion Generation through Resampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8713-8723} }
AirPlanes: Accurate Plane Estimation via 3D-Consistent Embeddings: Jamie Watson,

Filippo Aleotti,

Mohamed Sayed,

Zawar Qureshi,

Oisin Mac Aodha,

Gabriel Brostow,

Michael Firman,

Sara Vicente; [pdf] [arXiv]
[bibtex]
@InProceedings{Watson_2024_CVPR, author = {Watson, Jamie and Aleotti, Filippo and Sayed, Mohamed and Qureshi, Zawar and Mac Aodha, Oisin and Brostow, Gabriel and Firman, Michael and Vicente, Sara}, title = {AirPlanes: Accurate Plane Estimation via 3D-Consistent Embeddings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5270-5280} }
Blur2Blur: Blur Conversion for Unsupervised Image Deblurring on Unknown Domains: Bang-Dang Pham,

Phong Tran,

Anh Tran,

Cuong Pham,

Rang Nguyen,

Minh Hoai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pham_2024_CVPR, author = {Pham, Bang-Dang and Tran, Phong and Tran, Anh and Pham, Cuong and Nguyen, Rang and Hoai, Minh}, title = {Blur2Blur: Blur Conversion for Unsupervised Image Deblurring on Unknown Domains}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2804-2813} }
Exploring Vision Transformers for 3D Human Motion-Language Models with Motion Patches: Qing Yu,

Mikihiro Tanaka,

Kent Fujiwara; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2024_CVPR, author = {Yu, Qing and Tanaka, Mikihiro and Fujiwara, Kent}, title = {Exploring Vision Transformers for 3D Human Motion-Language Models with Motion Patches}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {937-946} }
Clustering for Protein Representation Learning: Ruijie Quan,

Wenguan Wang,

Fan Ma,

Hehe Fan,

Yi Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Quan_2024_CVPR, author = {Quan, Ruijie and Wang, Wenguan and Ma, Fan and Fan, Hehe and Yang, Yi}, title = {Clustering for Protein Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {319-329} }
CorrMatch: Label Propagation via Correlation Matching for Semi-Supervised Semantic Segmentation: Boyuan Sun,

Yuqi Yang,

Le Zhang,

Ming-Ming Cheng,

Qibin Hou; [pdf] [arXiv]
[bibtex]
@InProceedings{Sun_2024_CVPR, author = {Sun, Boyuan and Yang, Yuqi and Zhang, Le and Cheng, Ming-Ming and Hou, Qibin}, title = {CorrMatch: Label Propagation via Correlation Matching for Semi-Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3097-3107} }
Estimating Extreme 3D Image Rotations using Cascaded Attention: Shay Dekel,

Yosi Keller,

Martin Cadik; [pdf] [supp]
[bibtex]
@InProceedings{Dekel_2024_CVPR, author = {Dekel, Shay and Keller, Yosi and Cadik, Martin}, title = {Estimating Extreme 3D Image Rotations using Cascaded Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2588-2598} }
Adapt or Perish: Adaptive Sparse Transformer with Attentive Feature Refinement for Image Restoration: Shihao Zhou,

Duosheng Chen,

Jinshan Pan,

Jinglei Shi,

Jufeng Yang; [pdf]
[bibtex]
@InProceedings{Zhou_2024_CVPR, author = {Zhou, Shihao and Chen, Duosheng and Pan, Jinshan and Shi, Jinglei and Yang, Jufeng}, title = {Adapt or Perish: Adaptive Sparse Transformer with Attentive Feature Refinement for Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2952-2963} }
VINECS: Video-based Neural Character Skinning: Zhouyingcheng Liao,

Vladislav Golyanik,

Marc Habermann,

Christian Theobalt; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liao_2024_CVPR, author = {Liao, Zhouyingcheng and Golyanik, Vladislav and Habermann, Marc and Theobalt, Christian}, title = {VINECS: Video-based Neural Character Skinning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1377-1387} }
Your Student is Better Than Expected: Adaptive Teacher-Student Collaboration for Text-Conditional Diffusion Models: Nikita Starodubcev,

Dmitry Baranchuk,

Artem Fedorov,

Artem Babenko; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Starodubcev_2024_CVPR, author = {Starodubcev, Nikita and Baranchuk, Dmitry and Fedorov, Artem and Babenko, Artem}, title = {Your Student is Better Than Expected: Adaptive Teacher-Student Collaboration for Text-Conditional Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9275-9285} }
SHViT: Single-Head Vision Transformer with Memory Efficient Macro Design: Seokju Yun,

Youngmin Ro; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yun_2024_CVPR, author = {Yun, Seokju and Ro, Youngmin}, title = {SHViT: Single-Head Vision Transformer with Memory Efficient Macro Design}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5756-5767} }
CommonCanvas: Open Diffusion Models Trained on Creative-Commons Images: Aaron Gokaslan,

A. Feder Cooper,

Jasmine Collins,

Landan Seguin,

Austin Jacobson,

Mihir Patel,

Jonathan Frankle,

Cory Stephenson,

Volodymyr Kuleshov; [pdf] [supp]
[bibtex]
@InProceedings{Gokaslan_2024_CVPR, author = {Gokaslan, Aaron and Cooper, A. Feder and Collins, Jasmine and Seguin, Landan and Jacobson, Austin and Patel, Mihir and Frankle, Jonathan and Stephenson, Cory and Kuleshov, Volodymyr}, title = {CommonCanvas: Open Diffusion Models Trained on Creative-Commons Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8250-8260} }
Prompt-Driven Referring Image Segmentation with Instance Contrasting: Chao Shang,

Zichen Song,

Heqian Qiu,

Lanxiao Wang,

Fanman Meng,

Hongliang Li; [pdf]
[bibtex]
@InProceedings{Shang_2024_CVPR, author = {Shang, Chao and Song, Zichen and Qiu, Heqian and Wang, Lanxiao and Meng, Fanman and Li, Hongliang}, title = {Prompt-Driven Referring Image Segmentation with Instance Contrasting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4124-4134} }
Image Sculpting: Precise Object Editing with 3D Geometry Control: Jiraphon Yenphraphai,

Xichen Pan,

Sainan Liu,

Daniele Panozzo,

Saining Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yenphraphai_2024_CVPR, author = {Yenphraphai, Jiraphon and Pan, Xichen and Liu, Sainan and Panozzo, Daniele and Xie, Saining}, title = {Image Sculpting: Precise Object Editing with 3D Geometry Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4241-4251} }
PFStorer: Personalized Face Restoration and Super-Resolution: Tuomas Varanka,

Tapani Toivonen,

Soumya Tripathy,

Guoying Zhao,

Erman Acar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Varanka_2024_CVPR, author = {Varanka, Tuomas and Toivonen, Tapani and Tripathy, Soumya and Zhao, Guoying and Acar, Erman}, title = {PFStorer: Personalized Face Restoration and Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2372-2381} }
TextureDreamer: Image-Guided Texture Synthesis Through Geometry-Aware Diffusion: Yu-Ying Yeh,

Jia-Bin Huang,

Changil Kim,

Lei Xiao,

Thu Nguyen-Phuoc,

Numair Khan,

Cheng Zhang,

Manmohan Chandraker,

Carl S Marshall,

Zhao Dong,

Zhengqin Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yeh_2024_CVPR, author = {Yeh, Yu-Ying and Huang, Jia-Bin and Kim, Changil and Xiao, Lei and Nguyen-Phuoc, Thu and Khan, Numair and Zhang, Cheng and Chandraker, Manmohan and Marshall, Carl S and Dong, Zhao and Li, Zhengqin}, title = {TextureDreamer: Image-Guided Texture Synthesis Through Geometry-Aware Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4304-4314} }
Boosting Image Quality Assessment through Efficient Transformer Adaptation with Local Feature Enhancement: Kangmin Xu,

Liang Liao,

Jing Xiao,

Chaofeng Chen,

Haoning Wu,

Qiong Yan,

Weisi Lin; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Kangmin and Liao, Liang and Xiao, Jing and Chen, Chaofeng and Wu, Haoning and Yan, Qiong and Lin, Weisi}, title = {Boosting Image Quality Assessment through Efficient Transformer Adaptation with Local Feature Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2662-2672} }
Attention Calibration for Disentangled Text-to-Image Personalization: Yanbing Zhang,

Mengping Yang,

Qin Zhou,

Zhe Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yanbing and Yang, Mengping and Zhou, Qin and Wang, Zhe}, title = {Attention Calibration for Disentangled Text-to-Image Personalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4764-4774} }
One-Shot Structure-Aware Stylized Image Synthesis: Hansam Cho,

Jonghyun Lee,

Seunggyu Chang,

Yonghyun Jeong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cho_2024_CVPR, author = {Cho, Hansam and Lee, Jonghyun and Chang, Seunggyu and Jeong, Yonghyun}, title = {One-Shot Structure-Aware Stylized Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8302-8311} }
MR-VNet: Media Restoration using Volterra Networks: Siddharth Roheda,

Amit Unde,

Loay Rashid; [pdf]
[bibtex]
@InProceedings{Roheda_2024_CVPR, author = {Roheda, Siddharth and Unde, Amit and Rashid, Loay}, title = {MR-VNet: Media Restoration using Volterra Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6098-6107} }
Single Mesh Diffusion Models with Field Latents for Texture Generation: Thomas W. Mitchel,

Carlos Esteves,

Ameesh Makadia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mitchel_2024_CVPR, author = {Mitchel, Thomas W. and Esteves, Carlos and Makadia, Ameesh}, title = {Single Mesh Diffusion Models with Field Latents for Texture Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7953-7963} }
SAI3D: Segment Any Instance in 3D Scenes: Yingda Yin,

Yuzheng Liu,

Yang Xiao,

Daniel Cohen-Or,

Jingwei Huang,

Baoquan Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2024_CVPR, author = {Yin, Yingda and Liu, Yuzheng and Xiao, Yang and Cohen-Or, Daniel and Huang, Jingwei and Chen, Baoquan}, title = {SAI3D: Segment Any Instance in 3D Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3292-3302} }
TexOct: Generating Textures of 3D Models with Octree-based Diffusion: Jialun Liu,

Chenming Wu,

Xinqi Liu,

Xing Liu,

Jinbo Wu,

Haotian Peng,

Chen Zhao,

Haocheng Feng,

Jingtuo Liu,

Errui Ding; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Jialun and Wu, Chenming and Liu, Xinqi and Liu, Xing and Wu, Jinbo and Peng, Haotian and Zhao, Chen and Feng, Haocheng and Liu, Jingtuo and Ding, Errui}, title = {TexOct: Generating Textures of 3D Models with Octree-based Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4284-4293} }
Anatomically Constrained Implicit Face Models: Prashanth Chandran,

Gaspard Zoss; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chandran_2024_CVPR, author = {Chandran, Prashanth and Zoss, Gaspard}, title = {Anatomically Constrained Implicit Face Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2220-2229} }
Capturing Closely Interacted Two-Person Motions with Reaction Priors: Qi Fang,

Yinghui Fan,

Yanjun Li,

Junting Dong,

Dingwei Wu,

Weidong Zhang,

Kang Chen; [pdf] [supp]
[bibtex]
@InProceedings{Fang_2024_CVPR, author = {Fang, Qi and Fan, Yinghui and Li, Yanjun and Dong, Junting and Wu, Dingwei and Zhang, Weidong and Chen, Kang}, title = {Capturing Closely Interacted Two-Person Motions with Reaction Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {655-665} }
RobustSAM: Segment Anything Robustly on Degraded Images: Wei-Ting Chen,

Yu-Jiet Vong,

Sy-Yen Kuo,

Sizhou Ma,

Jian Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Wei-Ting and Vong, Yu-Jiet and Kuo, Sy-Yen and Ma, Sizhou and Wang, Jian}, title = {RobustSAM: Segment Anything Robustly on Degraded Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4081-4091} }
In-N-Out: Faithful 3D GAN Inversion with Volumetric Decomposition for Face Editing: Yiran Xu,

Zhixin Shu,

Cameron Smith,

Seoung Wug Oh,

Jia-Bin Huang; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Yiran and Shu, Zhixin and Smith, Cameron and Oh, Seoung Wug and Huang, Jia-Bin}, title = {In-N-Out: Faithful 3D GAN Inversion with Volumetric Decomposition for Face Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7225-7235} }
Combining Frame and GOP Embeddings for Neural Video Representation: Jens Eirik Saethre,

Roberto Azevedo,

Christopher Schroers; [pdf] [supp]
[bibtex]
@InProceedings{Saethre_2024_CVPR, author = {Saethre, Jens Eirik and Azevedo, Roberto and Schroers, Christopher}, title = {Combining Frame and GOP Embeddings for Neural Video Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9253-9263} }
Fantastic Animals and Where to Find Them: Segment Any Marine Animal with Dual SAM: Pingping Zhang,

Tianyu Yan,

Yang Liu,

Huchuan Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Pingping and Yan, Tianyu and Liu, Yang and Lu, Huchuan}, title = {Fantastic Animals and Where to Find Them: Segment Any Marine Animal with Dual SAM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2578-2587} }
Seeing and Hearing: Open-domain Visual-Audio Generation with Diffusion Latent Aligners: Yazhou Xing,

Yingqing He,

Zeyue Tian,

Xintao Wang,

Qifeng Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xing_2024_CVPR, author = {Xing, Yazhou and He, Yingqing and Tian, Zeyue and Wang, Xintao and Chen, Qifeng}, title = {Seeing and Hearing: Open-domain Visual-Audio Generation with Diffusion Latent Aligners}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7151-7161} }
Objects as Volumes: A Stochastic Geometry View of Opaque Solids: Bailey Miller,

Hanyu Chen,

Alice Lai,

Ioannis Gkioulekas; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Miller_2024_CVPR, author = {Miller, Bailey and Chen, Hanyu and Lai, Alice and Gkioulekas, Ioannis}, title = {Objects as Volumes: A Stochastic Geometry View of Opaque Solids}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {87-97} }
Improving Subject-Driven Image Synthesis with Subject-Agnostic Guidance: Kelvin C.K. Chan,

Yang Zhao,

Xuhui Jia,

Ming-Hsuan Yang,

Huisheng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chan_2024_CVPR, author = {Chan, Kelvin C.K. and Zhao, Yang and Jia, Xuhui and Yang, Ming-Hsuan and Wang, Huisheng}, title = {Improving Subject-Driven Image Synthesis with Subject-Agnostic Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6733-6742} }
Diffusion Model Alignment Using Direct Preference Optimization: Bram Wallace,

Meihua Dang,

Rafael Rafailov,

Linqi Zhou,

Aaron Lou,

Senthil Purushwalkam,

Stefano Ermon,

Caiming Xiong,

Shafiq Joty,

Nikhil Naik; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wallace_2024_CVPR, author = {Wallace, Bram and Dang, Meihua and Rafailov, Rafael and Zhou, Linqi and Lou, Aaron and Purushwalkam, Senthil and Ermon, Stefano and Xiong, Caiming and Joty, Shafiq and Naik, Nikhil}, title = {Diffusion Model Alignment Using Direct Preference Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8228-8238} }
ZeroNVS: Zero-Shot 360-Degree View Synthesis from a Single Image: Kyle Sargent,

Zizhang Li,

Tanmay Shah,

Charles Herrmann,

Hong-Xing Yu,

Yunzhi Zhang,

Eric Ryan Chan,

Dmitry Lagun,

Li Fei-Fei,

Deqing Sun,

Jiajun Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sargent_2024_CVPR, author = {Sargent, Kyle and Li, Zizhang and Shah, Tanmay and Herrmann, Charles and Yu, Hong-Xing and Zhang, Yunzhi and Chan, Eric Ryan and Lagun, Dmitry and Fei-Fei, Li and Sun, Deqing and Wu, Jiajun}, title = {ZeroNVS: Zero-Shot 360-Degree View Synthesis from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9420-9429} }
Restoration by Generation with Constrained Priors: Zheng Ding,

Xuaner Zhang,

Zhuowen Tu,

Zhihao Xia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ding_2024_CVPR, author = {Ding, Zheng and Zhang, Xuaner and Tu, Zhuowen and Xia, Zhihao}, title = {Restoration by Generation with Constrained Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2567-2577} }
Blur-aware Spatio-temporal Sparse Transformer for Video Deblurring: Huicong Zhang,

Haozhe Xie,

Hongxun Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Huicong and Xie, Haozhe and Yao, Hongxun}, title = {Blur-aware Spatio-temporal Sparse Transformer for Video Deblurring}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2673-2681} }
DiffusionPoser: Real-time Human Motion Reconstruction From Arbitrary Sparse Sensors Using Autoregressive Diffusion: Tom Van Wouwe,

Seunghwan Lee,

Antoine Falisse,

Scott Delp,

C. Karen Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Van_Wouwe_2024_CVPR, author = {Van Wouwe, Tom and Lee, Seunghwan and Falisse, Antoine and Delp, Scott and Liu, C. Karen}, title = {DiffusionPoser: Real-time Human Motion Reconstruction From Arbitrary Sparse Sensors Using Autoregressive Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2513-2523} }
MANUS: Markerless Grasp Capture using Articulated 3D Gaussians: Chandradeep Pokhariya,

Ishaan Nikhil Shah,

Angela Xing,

Zekun Li,

Kefan Chen,

Avinash Sharma,

Srinath Sridhar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pokhariya_2024_CVPR, author = {Pokhariya, Chandradeep and Shah, Ishaan Nikhil and Xing, Angela and Li, Zekun and Chen, Kefan and Sharma, Avinash and Sridhar, Srinath}, title = {MANUS: Markerless Grasp Capture using Articulated 3D Gaussians}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2197-2208} }
BerfScene: Bev-conditioned Equivariant Radiance Fields for Infinite 3D Scene Generation: Qihang Zhang,

Yinghao Xu,

Yujun Shen,

Bo Dai,

Bolei Zhou,

Ceyuan Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Qihang and Xu, Yinghao and Shen, Yujun and Dai, Bo and Zhou, Bolei and Yang, Ceyuan}, title = {BerfScene: Bev-conditioned Equivariant Radiance Fields for Infinite 3D Scene Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6839-6849} }
3D Facial Expressions through Analysis-by-Neural-Synthesis: George Retsinas,

Panagiotis P. Filntisis,

Radek Danecek,

Victoria F. Abrevaya,

Anastasios Roussos,

Timo Bolkart,

Petros Maragos; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Retsinas_2024_CVPR, author = {Retsinas, George and Filntisis, Panagiotis P. and Danecek, Radek and Abrevaya, Victoria F. and Roussos, Anastasios and Bolkart, Timo and Maragos, Petros}, title = {3D Facial Expressions through Analysis-by-Neural-Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2490-2501} }
Unleashing the Potential of SAM for Medical Adaptation via Hierarchical Decoding: Zhiheng Cheng,

Qingyue Wei,

Hongru Zhu,

Yan Wang,

Liangqiong Qu,

Wei Shao,

Yuyin Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2024_CVPR, author = {Cheng, Zhiheng and Wei, Qingyue and Zhu, Hongru and Wang, Yan and Qu, Liangqiong and Shao, Wei and Zhou, Yuyin}, title = {Unleashing the Potential of SAM for Medical Adaptation via Hierarchical Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3511-3522} }
Puff-Net: Efficient Style Transfer with Pure Content and Style Feature Fusion Network: Sizhe Zheng,

Pan Gao,

Peng Zhou,

Jie Qin; [pdf] [supp]
[bibtex]
@InProceedings{Zheng_2024_CVPR, author = {Zheng, Sizhe and Gao, Pan and Zhou, Peng and Qin, Jie}, title = {Puff-Net: Efficient Style Transfer with Pure Content and Style Feature Fusion Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8059-8068} }
Towards Progressive Multi-Frequency Representation for Image Warping: Jun Xiao,

Zihang Lyu,

Cong Zhang,

Yakun Ju,

Changjian Shui,

Kin-Man Lam; [pdf]
[bibtex]
@InProceedings{Xiao_2024_CVPR, author = {Xiao, Jun and Lyu, Zihang and Zhang, Cong and Ju, Yakun and Shui, Changjian and Lam, Kin-Man}, title = {Towards Progressive Multi-Frequency Representation for Image Warping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2995-3004} }
Learning to Control Camera Exposure via Reinforcement Learning: Kyunghyun Lee,

Ukcheol Shin,

Byeong-Uk Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2024_CVPR, author = {Lee, Kyunghyun and Shin, Ukcheol and Lee, Byeong-Uk}, title = {Learning to Control Camera Exposure via Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2975-2983} }
RNb-NeuS: Reflectance and Normal-based Multi-View 3D Reconstruction: Baptiste Brument,

Robin Bruneau,

Yvain Quéau,

Jean Mélou,

François Bernard Lauze,

Jean-Denis Durou,

Lilian Calvet; [pdf] [supp]
[bibtex]
@InProceedings{Brument_2024_CVPR, author = {Brument, Baptiste and Bruneau, Robin and Qu\'eau, Yvain and M\'elou, Jean and Lauze, Fran\c{c}ois Bernard and Durou, Jean-Denis and Calvet, Lilian}, title = {RNb-NeuS: Reflectance and Normal-based Multi-View 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5230-5239} }
Scaling Up Dynamic Human-Scene Interaction Modeling: Nan Jiang,

Zhiyuan Zhang,

Hongjie Li,

Xiaoxuan Ma,

Zan Wang,

Yixin Chen,

Tengyu Liu,

Yixin Zhu,

Siyuan Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2024_CVPR, author = {Jiang, Nan and Zhang, Zhiyuan and Li, Hongjie and Ma, Xiaoxuan and Wang, Zan and Chen, Yixin and Liu, Tengyu and Zhu, Yixin and Huang, Siyuan}, title = {Scaling Up Dynamic Human-Scene Interaction Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1737-1747} }
Semantic-aware SAM for Point-Prompted Instance Segmentation: Zhaoyang Wei,

Pengfei Chen,

Xuehui Yu,

Guorong Li,

Jianbin Jiao,

Zhenjun Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2024_CVPR, author = {Wei, Zhaoyang and Chen, Pengfei and Yu, Xuehui and Li, Guorong and Jiao, Jianbin and Han, Zhenjun}, title = {Semantic-aware SAM for Point-Prompted Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3585-3594} }
Make Pixels Dance: High-Dynamic Video Generation: Yan Zeng,

Guoqiang Wei,

Jiani Zheng,

Jiaxin Zou,

Yang Wei,

Yuchen Zhang,

Hang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2024_CVPR, author = {Zeng, Yan and Wei, Guoqiang and Zheng, Jiani and Zou, Jiaxin and Wei, Yang and Zhang, Yuchen and Li, Hang}, title = {Make Pixels Dance: High-Dynamic Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8850-8860} }
A&B BNN: Add&Bit-Operation-Only Hardware-Friendly Binary Neural Network: Ruichen Ma,

Guanchao Qiao,

Yian Liu,

Liwei Meng,

Ning Ning,

Yang Liu,

Shaogang Hu; [pdf]
[bibtex]
@InProceedings{Ma_2024_CVPR, author = {Ma, Ruichen and Qiao, Guanchao and Liu, Yian and Meng, Liwei and Ning, Ning and Liu, Yang and Hu, Shaogang}, title = {A\&B BNN: Add\&Bit-Operation-Only Hardware-Friendly Binary Neural Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5704-5713} }
Task-aligned Part-aware Panoptic Segmentation through Joint Object-Part Representations: Daan de Geus,

Gijs Dubbelman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{de_Geus_2024_CVPR, author = {de Geus, Daan and Dubbelman, Gijs}, title = {Task-aligned Part-aware Panoptic Segmentation through Joint Object-Part Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3174-3183} }
From Activation to Initialization: Scaling Insights for Optimizing Neural Fields: Hemanth Saratchandran,

Sameera Ramasinghe,

Simon Lucey; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Saratchandran_2024_CVPR, author = {Saratchandran, Hemanth and Ramasinghe, Sameera and Lucey, Simon}, title = {From Activation to Initialization: Scaling Insights for Optimizing Neural Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {413-422} }
DiffAvatar: Simulation-Ready Garment Optimization with Differentiable Simulation: Yifei Li,

Hsiao-yu Chen,

Egor Larionov,

Nikolaos Sarafianos,

Wojciech Matusik,

Tuur Stuyck; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Yifei and Chen, Hsiao-yu and Larionov, Egor and Sarafianos, Nikolaos and Matusik, Wojciech and Stuyck, Tuur}, title = {DiffAvatar: Simulation-Ready Garment Optimization with Differentiable Simulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4368-4378} }
AlignSAM: Aligning Segment Anything Model to Open Context via Reinforcement Learning: Duojun Huang,

Xinyu Xiong,

Jie Ma,

Jichang Li,

Zequn Jie,

Lin Ma,

Guanbin Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2024_CVPR, author = {Huang, Duojun and Xiong, Xinyu and Ma, Jie and Li, Jichang and Jie, Zequn and Ma, Lin and Li, Guanbin}, title = {AlignSAM: Aligning Segment Anything Model to Open Context via Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3205-3215} }
Learning Spatial Adaptation and Temporal Coherence in Diffusion Models for Video Super-Resolution: Zhikai Chen,

Fuchen Long,

Zhaofan Qiu,

Ting Yao,

Wengang Zhou,

Jiebo Luo,

Tao Mei; [pdf] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Zhikai and Long, Fuchen and Qiu, Zhaofan and Yao, Ting and Zhou, Wengang and Luo, Jiebo and Mei, Tao}, title = {Learning Spatial Adaptation and Temporal Coherence in Diffusion Models for Video Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9232-9241} }
Denoising Point Clouds in Latent Space via Graph Convolution and Invertible Neural Network: Aihua Mao,

Biao Yan,

Zijing Ma,

Ying He; [pdf] [supp]
[bibtex]
@InProceedings{Mao_2024_CVPR, author = {Mao, Aihua and Yan, Biao and Ma, Zijing and He, Ying}, title = {Denoising Point Clouds in Latent Space via Graph Convolution and Invertible Neural Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5768-5777} }
HIR-Diff: Unsupervised Hyperspectral Image Restoration Via Improved Diffusion Models: Li Pang,

Xiangyu Rui,

Long Cui,

Hongzhong Wang,

Deyu Meng,

Xiangyong Cao; [pdf] [supp]
[bibtex]
@InProceedings{Pang_2024_CVPR, author = {Pang, Li and Rui, Xiangyu and Cui, Long and Wang, Hongzhong and Meng, Deyu and Cao, Xiangyong}, title = {HIR-Diff: Unsupervised Hyperspectral Image Restoration Via Improved Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3005-3014} }
FreeDrag: Feature Dragging for Reliable Point-based Image Editing: Pengyang Ling,

Lin Chen,

Pan Zhang,

Huaian Chen,

Yi Jin,

Jinjin Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ling_2024_CVPR, author = {Ling, Pengyang and Chen, Lin and Zhang, Pan and Chen, Huaian and Jin, Yi and Zheng, Jinjin}, title = {FreeDrag: Feature Dragging for Reliable Point-based Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6860-6870} }
Confronting Ambiguity in 6D Object Pose Estimation via Score-Based Diffusion on SE(3): Tsu-Ching Hsiao,

Hao-Wei Chen,

Hsuan-Kung Yang,

Chun-Yi Lee; [pdf] [arXiv]
[bibtex]
@InProceedings{Hsiao_2024_CVPR, author = {Hsiao, Tsu-Ching and Chen, Hao-Wei and Yang, Hsuan-Kung and Lee, Chun-Yi}, title = {Confronting Ambiguity in 6D Object Pose Estimation via Score-Based Diffusion on SE(3)}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {352-362} }
DiffInDScene: Diffusion-based High-Quality 3D Indoor Scene Generation: Xiaoliang Ju,

Zhaoyang Huang,

Yijin Li,

Guofeng Zhang,

Yu Qiao,

Hongsheng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ju_2024_CVPR, author = {Ju, Xiaoliang and Huang, Zhaoyang and Li, Yijin and Zhang, Guofeng and Qiao, Yu and Li, Hongsheng}, title = {DiffInDScene: Diffusion-based High-Quality 3D Indoor Scene Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4526-4535} }
MAPSeg: Unified Unsupervised Domain Adaptation for Heterogeneous Medical Image Segmentation Based on 3D Masked Autoencoding and Pseudo-Labeling: Xuzhe Zhang,

Yuhao Wu,

Elsa Angelini,

Ang Li,

Jia Guo,

Jerod M. Rasmussen,

Thomas G. O'Connor,

Pathik D. Wadhwa,

Andrea Parolin Jackowski,

Hai Li,

Jonathan Posner,

Andrew F. Laine,

Yun Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Xuzhe and Wu, Yuhao and Angelini, Elsa and Li, Ang and Guo, Jia and Rasmussen, Jerod M. and O'Connor, Thomas G. and Wadhwa, Pathik D. and Jackowski, Andrea Parolin and Li, Hai and Posner, Jonathan and Laine, Andrew F. and Wang, Yun}, title = {MAPSeg: Unified Unsupervised Domain Adaptation for Heterogeneous Medical Image Segmentation Based on 3D Masked Autoencoding and Pseudo-Labeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5851-5862} }
DaReNeRF: Direction-aware Representation for Dynamic Scenes: Ange Lou,

Benjamin Planche,

Zhongpai Gao,

Yamin Li,

Tianyu Luan,

Hao Ding,

Terrence Chen,

Jack Noble,

Ziyan Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lou_2024_CVPR, author = {Lou, Ange and Planche, Benjamin and Gao, Zhongpai and Li, Yamin and Luan, Tianyu and Ding, Hao and Chen, Terrence and Noble, Jack and Wu, Ziyan}, title = {DaReNeRF: Direction-aware Representation for Dynamic Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5031-5042} }
SfmCAD: Unsupervised CAD Reconstruction by Learning Sketch-based Feature Modeling Operations: Pu Li,

Jianwei Guo,

Huibin Li,

Bedrich Benes,

Dong-Ming Yan; [pdf] [supp]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Pu and Guo, Jianwei and Li, Huibin and Benes, Bedrich and Yan, Dong-Ming}, title = {SfmCAD: Unsupervised CAD Reconstruction by Learning Sketch-based Feature Modeling Operations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4671-4680} }
Learning Degradation-unaware Representation with Prior-based Latent Transformations for Blind Face Restoration: Lianxin Xie,

Csbingbing Zheng,

Wen Xue,

Le Jiang,

Cheng Liu,

Si Wu,

Hau San Wong; [pdf]
[bibtex]
@InProceedings{Xie_2024_CVPR, author = {Xie, Lianxin and Zheng, Csbingbing and Xue, Wen and Jiang, Le and Liu, Cheng and Wu, Si and Wong, Hau San}, title = {Learning Degradation-unaware Representation with Prior-based Latent Transformations for Blind Face Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9120-9129} }
Faces that Speak: Jointly Synthesising Talking Face and Speech from Text: Youngjoon Jang,

Ji-Hoon Kim,

Junseok Ahn,

Doyeop Kwak,

Hong-Sun Yang,

Yoon-Cheol Ju,

Il-Hwan Kim,

Byeong-Yeol Kim,

Joon Son Chung; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jang_2024_CVPR, author = {Jang, Youngjoon and Kim, Ji-Hoon and Ahn, Junseok and Kwak, Doyeop and Yang, Hong-Sun and Ju, Yoon-Cheol and Kim, Il-Hwan and Kim, Byeong-Yeol and Chung, Joon Son}, title = {Faces that Speak: Jointly Synthesising Talking Face and Speech from Text}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8818-8828} }
DiffusionRegPose: Enhancing Multi-Person Pose Estimation using a Diffusion-Based End-to-End Regression Approach: Dayi Tan,

Hansheng Chen,

Wei Tian,

Lu Xiong; [pdf] [supp]
[bibtex]
@InProceedings{Tan_2024_CVPR, author = {Tan, Dayi and Chen, Hansheng and Tian, Wei and Xiong, Lu}, title = {DiffusionRegPose: Enhancing Multi-Person Pose Estimation using a Diffusion-Based End-to-End Regression Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2230-2239} }
Memory-Scalable and Simplified Functional Map Learning: Robin Magnet,

Maks Ovsjanikov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Magnet_2024_CVPR, author = {Magnet, Robin and Ovsjanikov, Maks}, title = {Memory-Scalable and Simplified Functional Map Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4041-4050} }
Gaussian Head Avatar: Ultra High-fidelity Head Avatar via Dynamic Gaussians: Yuelang Xu,

Benwang Chen,

Zhe Li,

Hongwen Zhang,

Lizhen Wang,

Zerong Zheng,

Yebin Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Yuelang and Chen, Benwang and Li, Zhe and Zhang, Hongwen and Wang, Lizhen and Zheng, Zerong and Liu, Yebin}, title = {Gaussian Head Avatar: Ultra High-fidelity Head Avatar via Dynamic Gaussians}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1931-1941} }
Stratified Avatar Generation from Sparse Observations: Han Feng,

Wenchao Ma,

Quankai Gao,

Xianwei Zheng,

Nan Xue,

Huijuan Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2024_CVPR, author = {Feng, Han and Ma, Wenchao and Gao, Quankai and Zheng, Xianwei and Xue, Nan and Xu, Huijuan}, title = {Stratified Avatar Generation from Sparse Observations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {153-163} }
Rewrite the Stars: Xu Ma,

Xiyang Dai,

Yue Bai,

Yizhou Wang,

Yun Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2024_CVPR, author = {Ma, Xu and Dai, Xiyang and Bai, Yue and Wang, Yizhou and Fu, Yun}, title = {Rewrite the Stars}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5694-5703} }
PairDETR : Joint Detection and Association of Human Bodies and Faces: Ammar Ali,

Georgii Gaikov,

Denis Rybalchenko,

Alexander Chigorin,

Ivan Laptev,

Sergey Zagoruyko; [pdf] [supp]
[bibtex]
@InProceedings{Ali_2024_CVPR, author = {Ali, Ammar and Gaikov, Georgii and Rybalchenko, Denis and Chigorin, Alexander and Laptev, Ivan and Zagoruyko, Sergey}, title = {PairDETR : Joint Detection and Association of Human Bodies and Faces}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {423-432} }
SportsSloMo: A New Benchmark and Baselines for Human-centric Video Frame Interpolation: Jiaben Chen,

Huaizu Jiang; [pdf] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Jiaben and Jiang, Huaizu}, title = {SportsSloMo: A New Benchmark and Baselines for Human-centric Video Frame Interpolation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6475-6486} }
Text2HOI: Text-guided 3D Motion Generation for Hand-Object Interaction: Junuk Cha,

Jihyeon Kim,

Jae Shin Yoon,

Seungryul Baek; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cha_2024_CVPR, author = {Cha, Junuk and Kim, Jihyeon and Yoon, Jae Shin and Baek, Seungryul}, title = {Text2HOI: Text-guided 3D Motion Generation for Hand-Object Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1577-1585} }
MACE: Mass Concept Erasure in Diffusion Models: Shilin Lu,

Zilan Wang,

Leyang Li,

Yanzhu Liu,

Adams Wai-Kin Kong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2024_CVPR, author = {Lu, Shilin and Wang, Zilan and Li, Leyang and Liu, Yanzhu and Kong, Adams Wai-Kin}, title = {MACE: Mass Concept Erasure in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6430-6440} }
PeLK: Parameter-efficient Large Kernel ConvNets with Peripheral Convolution: Honghao Chen,

Xiangxiang Chu,

Yongjian Ren,

Xin Zhao,

Kaiqi Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Honghao and Chu, Xiangxiang and Ren, Yongjian and Zhao, Xin and Huang, Kaiqi}, title = {PeLK: Parameter-efficient Large Kernel ConvNets with Peripheral Convolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5557-5567} }
AiOS: All-in-One-Stage Expressive Human Pose and Shape Estimation: Qingping Sun,

Yanjun Wang,

Ailing Zeng,

Wanqi Yin,

Chen Wei,

Wenjia Wang,

Haiyi Mei,

Chi-Sing Leung,

Ziwei Liu,

Lei Yang,

Zhongang Cai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2024_CVPR, author = {Sun, Qingping and Wang, Yanjun and Zeng, Ailing and Yin, Wanqi and Wei, Chen and Wang, Wenjia and Mei, Haiyi and Leung, Chi-Sing and Liu, Ziwei and Yang, Lei and Cai, Zhongang}, title = {AiOS: All-in-One-Stage Expressive Human Pose and Shape Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1834-1843} }
Design2Cloth: 3D Cloth Generation from 2D Masks: Jiali Zheng,

Rolandos Alexandros Potamias,

Stefanos Zafeiriou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2024_CVPR, author = {Zheng, Jiali and Potamias, Rolandos Alexandros and Zafeiriou, Stefanos}, title = {Design2Cloth: 3D Cloth Generation from 2D Masks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1748-1758} }
Amodal Completion via Progressive Mixed Context Diffusion: Katherine Xu,

Lingzhi Zhang,

Jianbo Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Katherine and Zhang, Lingzhi and Shi, Jianbo}, title = {Amodal Completion via Progressive Mixed Context Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9099-9109} }
Diffusion 3D Features (Diff3F): Decorating Untextured Shapes with Distilled Semantic Features: Niladri Shekhar Dutt,

Sanjeev Muralikrishnan,

Niloy J. Mitra; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dutt_2024_CVPR, author = {Dutt, Niladri Shekhar and Muralikrishnan, Sanjeev and Mitra, Niloy J.}, title = {Diffusion 3D Features (Diff3F): Decorating Untextured Shapes with Distilled Semantic Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4494-4504} }
Cinematic Behavior Transfer via NeRF-based Differentiable Filming: Xuekun Jiang,

Anyi Rao,

Jingbo Wang,

Dahua Lin,

Bo Dai; [pdf] [arXiv]
[bibtex]
@InProceedings{Jiang_2024_CVPR, author = {Jiang, Xuekun and Rao, Anyi and Wang, Jingbo and Lin, Dahua and Dai, Bo}, title = {Cinematic Behavior Transfer via NeRF-based Differentiable Filming}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6723-6732} }
Text-Driven Image Editing via Learnable Regions: Yuanze Lin,

Yi-Wen Chen,

Yi-Hsuan Tsai,

Lu Jiang,

Ming-Hsuan Yang; [pdf] [arXiv]
[bibtex]
@InProceedings{Lin_2024_CVPR, author = {Lin, Yuanze and Chen, Yi-Wen and Tsai, Yi-Hsuan and Jiang, Lu and Yang, Ming-Hsuan}, title = {Text-Driven Image Editing via Learnable Regions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7059-7068} }
Relation Rectification in Diffusion Model: Yinwei Wu,

Xingyi Yang,

Xinchao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, Yinwei and Yang, Xingyi and Wang, Xinchao}, title = {Relation Rectification in Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7685-7694} }
Mocap Everyone Everywhere: Lightweight Motion Capture With Smartwatches and a Head-Mounted Camera: Jiye Lee,

Hanbyul Joo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2024_CVPR, author = {Lee, Jiye and Joo, Hanbyul}, title = {Mocap Everyone Everywhere: Lightweight Motion Capture With Smartwatches and a Head-Mounted Camera}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1091-1100} }
Fast ODE-based Sampling for Diffusion Models in Around 5 Steps: Zhenyu Zhou,

Defang Chen,

Can Wang,

Chun Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2024_CVPR, author = {Zhou, Zhenyu and Chen, Defang and Wang, Can and Chen, Chun}, title = {Fast ODE-based Sampling for Diffusion Models in Around 5 Steps}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7777-7786} }
CLiC: Concept Learning in Context: Mehdi Safaee,

Aryan Mikaeili,

Or Patashnik,

Daniel Cohen-Or,

Ali Mahdavi-Amiri; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Safaee_2024_CVPR, author = {Safaee, Mehdi and Mikaeili, Aryan and Patashnik, Or and Cohen-Or, Daniel and Mahdavi-Amiri, Ali}, title = {CLiC: Concept Learning in Context}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6924-6933} }
CAD-SIGNet: CAD Language Inference from Point Clouds using Layer-wise Sketch Instance Guided Attention: Mohammad Sadil Khan,

Elona Dupont,

Sk Aziz Ali,

Kseniya Cherenkova,

Anis Kacem,

Djamila Aouada; [pdf] [supp]
[bibtex]
@InProceedings{Khan_2024_CVPR, author = {Khan, Mohammad Sadil and Dupont, Elona and Ali, Sk Aziz and Cherenkova, Kseniya and Kacem, Anis and Aouada, Djamila}, title = {CAD-SIGNet: CAD Language Inference from Point Clouds using Layer-wise Sketch Instance Guided Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4713-4722} }
CLIB-FIQA: Face Image Quality Assessment with Confidence Calibration: Fu-Zhao Ou,

Chongyi Li,

Shiqi Wang,

Sam Kwong; [pdf]
[bibtex]
@InProceedings{Ou_2024_CVPR, author = {Ou, Fu-Zhao and Li, Chongyi and Wang, Shiqi and Kwong, Sam}, title = {CLIB-FIQA: Face Image Quality Assessment with Confidence Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1694-1704} }
Predicated Diffusion: Predicate Logic-Based Attention Guidance for Text-to-Image Diffusion Models: Kota Sueyoshi,

Takashi Matsubara; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sueyoshi_2024_CVPR, author = {Sueyoshi, Kota and Matsubara, Takashi}, title = {Predicated Diffusion: Predicate Logic-Based Attention Guidance for Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8651-8660} }
MoML: Online Meta Adaptation for 3D Human Motion Prediction: Xiaoning Sun,

Huaijiang Sun,

Bin Li,

Dong Wei,

Weiqing Li,

Jianfeng Lu; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2024_CVPR, author = {Sun, Xiaoning and Sun, Huaijiang and Li, Bin and Wei, Dong and Li, Weiqing and Lu, Jianfeng}, title = {MoML: Online Meta Adaptation for 3D Human Motion Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1042-1051} }
CAT-DM: Controllable Accelerated Virtual Try-on with Diffusion Model: Jianhao Zeng,

Dan Song,

Weizhi Nie,

Hongshuo Tian,

Tongtong Wang,

An-An Liu; [pdf] [supp]
[bibtex]
@InProceedings{Zeng_2024_CVPR, author = {Zeng, Jianhao and Song, Dan and Nie, Weizhi and Tian, Hongshuo and Wang, Tongtong and Liu, An-An}, title = {CAT-DM: Controllable Accelerated Virtual Try-on with Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8372-8382} }
Synergistic Global-space Camera and Human Reconstruction from Videos: Yizhou Zhao,

Tuanfeng Yang Wang,

Bhiksha Raj,

Min Xu,

Jimei Yang,

Chun-Hao Paul Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2024_CVPR, author = {Zhao, Yizhou and Wang, Tuanfeng Yang and Raj, Bhiksha and Xu, Min and Yang, Jimei and Huang, Chun-Hao Paul}, title = {Synergistic Global-space Camera and Human Reconstruction from Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1216-1226} }
3D Face Reconstruction with the Geometric Guidance of Facial Part Segmentation: Zidu Wang,

Xiangyu Zhu,

Tianshuo Zhang,

Baiqin Wang,

Zhen Lei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Zidu and Zhu, Xiangyu and Zhang, Tianshuo and Wang, Baiqin and Lei, Zhen}, title = {3D Face Reconstruction with the Geometric Guidance of Facial Part Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1672-1682} }
FreeU: Free Lunch in Diffusion U-Net: Chenyang Si,

Ziqi Huang,

Yuming Jiang,

Ziwei Liu; [pdf] [supp]
[bibtex]
@InProceedings{Si_2024_CVPR, author = {Si, Chenyang and Huang, Ziqi and Jiang, Yuming and Liu, Ziwei}, title = {FreeU: Free Lunch in Diffusion U-Net}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4733-4743} }
ViewDiff: 3D-Consistent Image Generation with Text-to-Image Models: Lukas Höllein,

Aljaž Boži?,

Norman Müller,

David Novotny,

Hung-Yu Tseng,

Christian Richardt,

Michael Zollhöfer,

Matthias Nießner; [pdf] [supp]
[bibtex]
@InProceedings{Hollein_2024_CVPR, author = {H\"ollein, Lukas and Bo\v{z}i?, Alja\v{z} and M\"uller, Norman and Novotny, David and Tseng, Hung-Yu and Richardt, Christian and Zollh\"ofer, Michael and Nie{\ss}ner, Matthias}, title = {ViewDiff: 3D-Consistent Image Generation with Text-to-Image Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5043-5052} }
Diffusion Models Without Attention: Jing Nathan Yan,

Jiatao Gu,

Alexander M. Rush; [pdf] [arXiv]
[bibtex]
@InProceedings{Yan_2024_CVPR, author = {Yan, Jing Nathan and Gu, Jiatao and Rush, Alexander M.}, title = {Diffusion Models Without Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8239-8249} }
Emotional Speech-driven 3D Body Animation via Disentangled Latent Diffusion: Kiran Chhatre,

Radek Dan??ek,

Nikos Athanasiou,

Giorgio Becherini,

Christopher Peters,

Michael J. Black,

Timo Bolkart; [pdf] [supp]
[bibtex]
@InProceedings{Chhatre_2024_CVPR, author = {Chhatre, Kiran and Dan??ek, Radek and Athanasiou, Nikos and Becherini, Giorgio and Peters, Christopher and Black, Michael J. and Bolkart, Timo}, title = {Emotional Speech-driven 3D Body Animation via Disentangled Latent Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1942-1953} }
Retrieval-Augmented Layout Transformer for Content-Aware Layout Generation: Daichi Horita,

Naoto Inoue,

Kotaro Kikuchi,

Kota Yamaguchi,

Kiyoharu Aizawa; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Horita_2024_CVPR, author = {Horita, Daichi and Inoue, Naoto and Kikuchi, Kotaro and Yamaguchi, Kota and Aizawa, Kiyoharu}, title = {Retrieval-Augmented Layout Transformer for Content-Aware Layout Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {67-76} }
InstantBooth: Personalized Text-to-Image Generation without Test-Time Finetuning: Jing Shi,

Wei Xiong,

Zhe Lin,

Hyun Joon Jung; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2024_CVPR, author = {Shi, Jing and Xiong, Wei and Lin, Zhe and Jung, Hyun Joon}, title = {InstantBooth: Personalized Text-to-Image Generation without Test-Time Finetuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8543-8552} }
SD2Event:Self-supervised Learning of Dynamic Detectors and Contextual Descriptors for Event Cameras: Yuan Gao,

Yuqing Zhu,

Xinjun Li,

Yimin Du,

Tianzhu Zhang; [pdf]
[bibtex]
@InProceedings{Gao_2024_CVPR, author = {Gao, Yuan and Zhu, Yuqing and Li, Xinjun and Du, Yimin and Zhang, Tianzhu}, title = {SD2Event:Self-supervised Learning of Dynamic Detectors and Contextual Descriptors for Event Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3055-3064} }
PaReNeRF: Toward Fast Large-scale Dynamic NeRF with Patch-based Reference: Xiao Tang,

Min Yang,

Penghui Sun,

Hui Li,

Yuchao Dai,

Feng Zhu,

Hojae Lee; [pdf] [supp]
[bibtex]
@InProceedings{Tang_2024_CVPR, author = {Tang, Xiao and Yang, Min and Sun, Penghui and Li, Hui and Dai, Yuchao and Zhu, Feng and Lee, Hojae}, title = {PaReNeRF: Toward Fast Large-scale Dynamic NeRF with Patch-based Reference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5428-5438} }
Affine Equivariant Networks Based on Differential Invariants: Yikang Li,

Yeqing Qiu,

Yuxuan Chen,

Lingshen He,

Zhouchen Lin; [pdf] [supp]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Yikang and Qiu, Yeqing and Chen, Yuxuan and He, Lingshen and Lin, Zhouchen}, title = {Affine Equivariant Networks Based on Differential Invariants}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5546-5556} }
Selectively Informative Description can Reduce Undesired Embedding Entanglements in Text-to-Image Personalization: Jimyeong Kim,

Jungwon Park,

Wonjong Rhee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2024_CVPR, author = {Kim, Jimyeong and Park, Jungwon and Rhee, Wonjong}, title = {Selectively Informative Description can Reduce Undesired Embedding Entanglements in Text-to-Image Personalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8312-8322} }
Smooth Diffusion: Crafting Smooth Latent Spaces in Diffusion Models: Jiayi Guo,

Xingqian Xu,

Yifan Pu,

Zanlin Ni,

Chaofei Wang,

Manushree Vasu,

Shiji Song,

Gao Huang,

Humphrey Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2024_CVPR, author = {Guo, Jiayi and Xu, Xingqian and Pu, Yifan and Ni, Zanlin and Wang, Chaofei and Vasu, Manushree and Song, Shiji and Huang, Gao and Shi, Humphrey}, title = {Smooth Diffusion: Crafting Smooth Latent Spaces in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7548-7558} }
FlowIE: Efficient Image Enhancement via Rectified Flow: Yixuan Zhu,

Wenliang Zhao,

Ao Li,

Yansong Tang,

Jie Zhou,

Jiwen Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2024_CVPR, author = {Zhu, Yixuan and Zhao, Wenliang and Li, Ao and Tang, Yansong and Zhou, Jie and Lu, Jiwen}, title = {FlowIE: Efficient Image Enhancement via Rectified Flow}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13-22} }
Improving Training Efficiency of Diffusion Models via Multi-Stage Framework and Tailored Multi-Decoder Architecture: Huijie Zhang,

Yifu Lu,

Ismail Alkhouri,

Saiprasad Ravishankar,

Dogyoon Song,

Qing Qu; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Huijie and Lu, Yifu and Alkhouri, Ismail and Ravishankar, Saiprasad and Song, Dogyoon and Qu, Qing}, title = {Improving Training Efficiency of Diffusion Models via Multi-Stage Framework and Tailored Multi-Decoder Architecture}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7372-7381} }
In-Context Matting: He Guo,

Zixuan Ye,

Zhiguo Cao,

Hao Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2024_CVPR, author = {Guo, He and Ye, Zixuan and Cao, Zhiguo and Lu, Hao}, title = {In-Context Matting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3711-3720} }
DemoCaricature: Democratising Caricature Generation with a Rough Sketch: Dar-Yen Chen,

Ayan Kumar Bhunia,

Subhadeep Koley,

Aneeshan Sain,

Pinaki Nath Chowdhury,

Yi-Zhe Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Dar-Yen and Bhunia, Ayan Kumar and Koley, Subhadeep and Sain, Aneeshan and Chowdhury, Pinaki Nath and Song, Yi-Zhe}, title = {DemoCaricature: Democratising Caricature Generation with a Rough Sketch}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8629-8639} }
CapHuman: Capture Your Moments in Parallel Universes: Chao Liang,

Fan Ma,

Linchao Zhu,

Yingying Deng,

Yi Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2024_CVPR, author = {Liang, Chao and Ma, Fan and Zhu, Linchao and Deng, Yingying and Yang, Yi}, title = {CapHuman: Capture Your Moments in Parallel Universes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6400-6409} }
SDPose: Tokenized Pose Estimation via Circulation-Guide Self-Distillation: Sichen Chen,

Yingyi Zhang,

Siming Huang,

Ran Yi,

Ke Fan,

Ruixin Zhang,

Peixian Chen,

Jun Wang,

Shouhong Ding,

Lizhuang Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Sichen and Zhang, Yingyi and Huang, Siming and Yi, Ran and Fan, Ke and Zhang, Ruixin and Chen, Peixian and Wang, Jun and Ding, Shouhong and Ma, Lizhuang}, title = {SDPose: Tokenized Pose Estimation via Circulation-Guide Self-Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1082-1090} }
Authentic Hand Avatar from a Phone Scan via Universal Hand Model: Gyeongsik Moon,

Weipeng Xu,

Rohan Joshi,

Chenglei Wu,

Takaaki Shiratori; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Moon_2024_CVPR, author = {Moon, Gyeongsik and Xu, Weipeng and Joshi, Rohan and Wu, Chenglei and Shiratori, Takaaki}, title = {Authentic Hand Avatar from a Phone Scan via Universal Hand Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2029-2038} }
Open-World Semantic Segmentation Including Class Similarity: Matteo Sodano,

Federico Magistri,

Lucas Nunes,

Jens Behley,

Cyrill Stachniss; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sodano_2024_CVPR, author = {Sodano, Matteo and Magistri, Federico and Nunes, Lucas and Behley, Jens and Stachniss, Cyrill}, title = {Open-World Semantic Segmentation Including Class Similarity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3184-3194} }
Towards Memorization-Free Diffusion Models: Chen Chen,

Daochang Liu,

Chang Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Chen and Liu, Daochang and Xu, Chang}, title = {Towards Memorization-Free Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8425-8434} }
IQ-VFI: Implicit Quadratic Motion Estimation for Video Frame Interpolation: Mengshun Hu,

Kui Jiang,

Zhihang Zhong,

Zheng Wang,

Yinqiang Zheng; [pdf]
[bibtex]
@InProceedings{Hu_2024_CVPR, author = {Hu, Mengshun and Jiang, Kui and Zhong, Zhihang and Wang, Zheng and Zheng, Yinqiang}, title = {IQ-VFI: Implicit Quadratic Motion Estimation for Video Frame Interpolation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6410-6419} }
KeyPoint Relative Position Encoding for Face Recognition: Minchul Kim,

Yiyang Su,

Feng Liu,

Anil Jain,

Xiaoming Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2024_CVPR, author = {Kim, Minchul and Su, Yiyang and Liu, Feng and Jain, Anil and Liu, Xiaoming}, title = {KeyPoint Relative Position Encoding for Face Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {244-255} }
Hyper-MD: Mesh Denoising with Customized Parameters Aware of Noise Intensity and Geometric Characteristics: Xingtao Wang,

Hongliang Wei,

Xiaopeng Fan,

Debin Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Xingtao and Wei, Hongliang and Fan, Xiaopeng and Zhao, Debin}, title = {Hyper-MD: Mesh Denoising with Customized Parameters Aware of Noise Intensity and Geometric Characteristics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4651-4660} }
Beyond First-Order Tweedie: Solving Inverse Problems using Latent Diffusion: Litu Rout,

Yujia Chen,

Abhishek Kumar,

Constantine Caramanis,

Sanjay Shakkottai,

Wen-Sheng Chu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rout_2024_CVPR, author = {Rout, Litu and Chen, Yujia and Kumar, Abhishek and Caramanis, Constantine and Shakkottai, Sanjay and Chu, Wen-Sheng}, title = {Beyond First-Order Tweedie: Solving Inverse Problems using Latent Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9472-9481} }
Rethinking the Objectives of Vector-Quantized Tokenizers for Image Synthesis: Yuchao Gu,

Xintao Wang,

Yixiao Ge,

Ying Shan,

Mike Zheng Shou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gu_2024_CVPR, author = {Gu, Yuchao and Wang, Xintao and Ge, Yixiao and Shan, Ying and Shou, Mike Zheng}, title = {Rethinking the Objectives of Vector-Quantized Tokenizers for Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7631-7640} }
Continuous Pose for Monocular Cameras in Neural Implicit Representation: Qi Ma,

Danda Pani Paudel,

Ajad Chhatkuli,

Luc Van Gool; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2024_CVPR, author = {Ma, Qi and Paudel, Danda Pani and Chhatkuli, Ajad and Van Gool, Luc}, title = {Continuous Pose for Monocular Cameras in Neural Implicit Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5291-5301} }
D^4: Dataset Distillation via Disentangled Diffusion Model: Duo Su,

Junjie Hou,

Weizhi Gao,

Yingjie Tian,

Bowen Tang; [pdf] [supp]
[bibtex]
@InProceedings{Su_2024_CVPR, author = {Su, Duo and Hou, Junjie and Gao, Weizhi and Tian, Yingjie and Tang, Bowen}, title = {D{\textasciicircum}4: Dataset Distillation via Disentangled Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5809-5818} }
360DVD: Controllable Panorama Video Generation with 360-Degree Video Diffusion Model: Qian Wang,

Weiqi Li,

Chong Mou,

Xinhua Cheng,

Jian Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Qian and Li, Weiqi and Mou, Chong and Cheng, Xinhua and Zhang, Jian}, title = {360DVD: Controllable Panorama Video Generation with 360-Degree Video Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6913-6923} }
RankMatch: Exploring the Better Consistency Regularization for Semi-supervised Semantic Segmentation: Huayu Mai,

Rui Sun,

Tianzhu Zhang,

Feng Wu; [pdf]
[bibtex]
@InProceedings{Mai_2024_CVPR, author = {Mai, Huayu and Sun, Rui and Zhang, Tianzhu and Wu, Feng}, title = {RankMatch: Exploring the Better Consistency Regularization for Semi-supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3391-3401} }
DuPL: Dual Student with Trustworthy Progressive Learning for Robust Weakly Supervised Semantic Segmentation: Yuanchen Wu,

Xichen Ye,

Kequan Yang,

Jide Li,

Xiaoqiang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, Yuanchen and Ye, Xichen and Yang, Kequan and Li, Jide and Li, Xiaoqiang}, title = {DuPL: Dual Student with Trustworthy Progressive Learning for Robust Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3534-3543} }
SurMo: Surface-based 4D Motion Modeling for Dynamic Human Rendering: Tao Hu,

Fangzhou Hong,

Ziwei Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2024_CVPR, author = {Hu, Tao and Hong, Fangzhou and Liu, Ziwei}, title = {SurMo: Surface-based 4D Motion Modeling for Dynamic Human Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6550-6560} }
Hierarchical Spatio-temporal Decoupling for Text-to-Video Generation: Zhiwu Qing,

Shiwei Zhang,

Jiayu Wang,

Xiang Wang,

Yujie Wei,

Yingya Zhang,

Changxin Gao,

Nong Sang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qing_2024_CVPR, author = {Qing, Zhiwu and Zhang, Shiwei and Wang, Jiayu and Wang, Xiang and Wei, Yujie and Zhang, Yingya and Gao, Changxin and Sang, Nong}, title = {Hierarchical Spatio-temporal Decoupling for Text-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6635-6645} }
PLACE: Adaptive Layout-Semantic Fusion for Semantic Image Synthesis: Zhengyao Lv,

Yuxiang Wei,

Wangmeng Zuo,

Kwan-Yee K. Wong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lv_2024_CVPR, author = {Lv, Zhengyao and Wei, Yuxiang and Zuo, Wangmeng and Wong, Kwan-Yee K.}, title = {PLACE: Adaptive Layout-Semantic Fusion for Semantic Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9264-9274} }
Exploring Efficient Asymmetric Blind-Spots for Self-Supervised Denoising in Real-World Scenarios: Shiyan Chen,

Jiyuan Zhang,

Zhaofei Yu,

Tiejun Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Shiyan and Zhang, Jiyuan and Yu, Zhaofei and Huang, Tiejun}, title = {Exploring Efficient Asymmetric Blind-Spots for Self-Supervised Denoising in Real-World Scenarios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2814-2823} }
Efficient Multi-scale Network with Learnable Discrete Wavelet Transform for Blind Motion Deblurring: Xin Gao,

Tianheng Qiu,

Xinyu Zhang,

Hanlin Bai,

Kang Liu,

Xuan Huang,

Hu Wei,

Guoying Zhang,

Huaping Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2024_CVPR, author = {Gao, Xin and Qiu, Tianheng and Zhang, Xinyu and Bai, Hanlin and Liu, Kang and Huang, Xuan and Wei, Hu and Zhang, Guoying and Liu, Huaping}, title = {Efficient Multi-scale Network with Learnable Discrete Wavelet Transform for Blind Motion Deblurring}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2733-2742} }
MaskPLAN: Masked Generative Layout Planning from Partial Input: Hang Zhang,

Anton Savov,

Benjamin Dillenburger; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Hang and Savov, Anton and Dillenburger, Benjamin}, title = {MaskPLAN: Masked Generative Layout Planning from Partial Input}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8964-8973} }
HMD-Poser: On-Device Real-time Human Motion Tracking from Scalable Sparse Observations: Peng Dai,

Yang Zhang,

Tao Liu,

Zhen Fan,

Tianyuan Du,

Zhuo Su,

Xiaozheng Zheng,

Zeming Li; [pdf] [supp]
[bibtex]
@InProceedings{Dai_2024_CVPR, author = {Dai, Peng and Zhang, Yang and Liu, Tao and Fan, Zhen and Du, Tianyuan and Su, Zhuo and Zheng, Xiaozheng and Li, Zeming}, title = {HMD-Poser: On-Device Real-time Human Motion Tracking from Scalable Sparse Observations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {874-884} }
Flexible Biometrics Recognition: Bridging the Multimodality Gap through Attention Alignment and Prompt Tuning: Leslie Ching Ow Tiong,

Dick Sigmund,

Chen-Hui Chan,

Andrew Beng Jin Teoh; [pdf] [supp]
[bibtex]
@InProceedings{Tiong_2024_CVPR, author = {Tiong, Leslie Ching Ow and Sigmund, Dick and Chan, Chen-Hui and Teoh, Andrew Beng Jin}, title = {Flexible Biometrics Recognition: Bridging the Multimodality Gap through Attention Alignment and Prompt Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {267-276} }
Multi-scale Dynamic and Hierarchical Relationship Modeling for Facial Action Units Recognition: Zihan Wang,

Siyang Song,

Cheng Luo,

Songhe Deng,

Weicheng Xie,

Linlin Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Zihan and Song, Siyang and Luo, Cheng and Deng, Songhe and Xie, Weicheng and Shen, Linlin}, title = {Multi-scale Dynamic and Hierarchical Relationship Modeling for Facial Action Units Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1270-1280} }
EventEgo3D: 3D Human Motion Capture from Egocentric Event Streams: Christen Millerdurai,

Hiroyasu Akada,

Jian Wang,

Diogo Luvizon,

Christian Theobalt,

Vladislav Golyanik; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Millerdurai_2024_CVPR, author = {Millerdurai, Christen and Akada, Hiroyasu and Wang, Jian and Luvizon, Diogo and Theobalt, Christian and Golyanik, Vladislav}, title = {EventEgo3D: 3D Human Motion Capture from Egocentric Event Streams}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1186-1195} }
A Call to Reflect on Evaluation Practices for Age Estimation: Comparative Analysis of the State-of-the-Art and a Unified Benchmark: Jakub Paplhám,

Vojt?ch Franc; [pdf] [supp]
[bibtex]
@InProceedings{Paplham_2024_CVPR, author = {Paplh\'am, Jakub and Franc, Vojt?ch}, title = {A Call to Reflect on Evaluation Practices for Age Estimation: Comparative Analysis of the State-of-the-Art and a Unified Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1196-1205} }
CosalPure: Learning Concept from Group Images for Robust Co-Saliency Detection: Jiayi Zhu,

Qing Guo,

Felix Juefei-Xu,

Yihao Huang,

Yang Liu,

Geguang Pu; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhu_2024_CVPR, author = {Zhu, Jiayi and Guo, Qing and Juefei-Xu, Felix and Huang, Yihao and Liu, Yang and Pu, Geguang}, title = {CosalPure: Learning Concept from Group Images for Robust Co-Saliency Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3669-3678} }
MRFP: Learning Generalizable Semantic Segmentation from Sim-2-Real with Multi-Resolution Feature Perturbation: Sumanth Udupa,

Prajwal Gurunath,

Aniruddh Sikdar,

Suresh Sundaram; [pdf] [supp]
[bibtex]
@InProceedings{Udupa_2024_CVPR, author = {Udupa, Sumanth and Gurunath, Prajwal and Sikdar, Aniruddh and Sundaram, Suresh}, title = {MRFP: Learning Generalizable Semantic Segmentation from Sim-2-Real with Multi-Resolution Feature Perturbation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5904-5914} }
MotionEditor: Editing Video Motion via Content-Aware Diffusion: Shuyuan Tu,

Qi Dai,

Zhi-Qi Cheng,

Han Hu,

Xintong Han,

Zuxuan Wu,

Yu-Gang Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tu_2024_CVPR, author = {Tu, Shuyuan and Dai, Qi and Cheng, Zhi-Qi and Hu, Han and Han, Xintong and Wu, Zuxuan and Jiang, Yu-Gang}, title = {MotionEditor: Editing Video Motion via Content-Aware Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7882-7891} }
Doubly Abductive Counterfactual Inference for Text-based Image Editing: Xue Song,

Jiequan Cui,

Hanwang Zhang,

Jingjing Chen,

Richang Hong,

Yu-Gang Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2024_CVPR, author = {Song, Xue and Cui, Jiequan and Zhang, Hanwang and Chen, Jingjing and Hong, Richang and Jiang, Yu-Gang}, title = {Doubly Abductive Counterfactual Inference for Text-based Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9162-9171} }
Normalizing Flows on the Product Space of SO(3) Manifolds for Probabilistic Human Pose Modeling: Olaf Dünkel,

Tim Salzmann,

Florian Pfaff; [pdf] [supp]
[bibtex]
@InProceedings{Dunkel_2024_CVPR, author = {D\"unkel, Olaf and Salzmann, Tim and Pfaff, Florian}, title = {Normalizing Flows on the Product Space of SO(3) Manifolds for Probabilistic Human Pose Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2285-2294} }
ReGenNet: Towards Human Action-Reaction Synthesis: Liang Xu,

Yizhou Zhou,

Yichao Yan,

Xin Jin,

Wenhan Zhu,

Fengyun Rao,

Xiaokang Yang,

Wenjun Zeng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Liang and Zhou, Yizhou and Yan, Yichao and Jin, Xin and Zhu, Wenhan and Rao, Fengyun and Yang, Xiaokang and Zeng, Wenjun}, title = {ReGenNet: Towards Human Action-Reaction Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1759-1769} }
A Simple Baseline for Efficient Hand Mesh Reconstruction: Zhishan Zhou,

Shihao Zhou,

Zhi Lv,

Minqiang Zou,

Yao Tang,

Jiajun Liang; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhou_2024_CVPR, author = {Zhou, Zhishan and Zhou, Shihao and Lv, Zhi and Zou, Minqiang and Tang, Yao and Liang, Jiajun}, title = {A Simple Baseline for Efficient Hand Mesh Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1367-1376} }
PhotoMaker: Customizing Realistic Human Photos via Stacked ID Embedding: Zhen Li,

Mingdeng Cao,

Xintao Wang,

Zhongang Qi,

Ming-Ming Cheng,

Ying Shan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Zhen and Cao, Mingdeng and Wang, Xintao and Qi, Zhongang and Cheng, Ming-Ming and Shan, Ying}, title = {PhotoMaker: Customizing Realistic Human Photos via Stacked ID Embedding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8640-8650} }
Score-Guided Diffusion for 3D Human Recovery: Anastasis Stathopoulos,

Ligong Han,

Dimitris Metaxas; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Stathopoulos_2024_CVPR, author = {Stathopoulos, Anastasis and Han, Ligong and Metaxas, Dimitris}, title = {Score-Guided Diffusion for 3D Human Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {906-915} }
Check Locate Rectify: A Training-Free Layout Calibration System for Text-to-Image Generation: Biao Gong,

Siteng Huang,

Yutong Feng,

Shiwei Zhang,

Yuyuan Li,

Yu Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gong_2024_CVPR, author = {Gong, Biao and Huang, Siteng and Feng, Yutong and Zhang, Shiwei and Li, Yuyuan and Liu, Yu}, title = {Check Locate Rectify: A Training-Free Layout Calibration System for Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6624-6634} }
Pose-Transformed Equivariant Network for 3D Point Trajectory Prediction: Ruixuan Yu,

Jian Sun; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2024_CVPR, author = {Yu, Ruixuan and Sun, Jian}, title = {Pose-Transformed Equivariant Network for 3D Point Trajectory Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5503-5512} }
Revisiting Sampson Approximations for Geometric Estimation Problems: Felix Rydell,

Angélica Torres,

Viktor Larsson; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rydell_2024_CVPR, author = {Rydell, Felix and Torres, Ang\'elica and Larsson, Viktor}, title = {Revisiting Sampson Approximations for Geometric Estimation Problems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4990-4998} }
Fixed Point Diffusion Models: Xingjian Bai,

Luke Melas-Kyriazi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bai_2024_CVPR, author = {Bai, Xingjian and Melas-Kyriazi, Luke}, title = {Fixed Point Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9430-9440} }
Residual Learning in Diffusion Models: Junyu Zhang,

Daochang Liu,

Eunbyung Park,

Shichao Zhang,

Chang Xu; [pdf]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Junyu and Liu, Daochang and Park, Eunbyung and Zhang, Shichao and Xu, Chang}, title = {Residual Learning in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7289-7299} }
Beyond Textual Constraints: Learning Novel Diffusion Conditions with Fewer Examples: Yuyang Yu,

Bangzhen Liu,

Chenxi Zheng,

Xuemiao Xu,

Huaidong Zhang,

Shengfeng He; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2024_CVPR, author = {Yu, Yuyang and Liu, Bangzhen and Zheng, Chenxi and Xu, Xuemiao and Zhang, Huaidong and He, Shengfeng}, title = {Beyond Textual Constraints: Learning Novel Diffusion Conditions with Fewer Examples}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7109-7118} }
Exploiting Style Latent Flows for Generalizing Deepfake Video Detection: Jongwook Choi,

Taehoon Kim,

Yonghyun Jeong,

Seungryul Baek,

Jongwon Choi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2024_CVPR, author = {Choi, Jongwook and Kim, Taehoon and Jeong, Yonghyun and Baek, Seungryul and Choi, Jongwon}, title = {Exploiting Style Latent Flows for Generalizing Deepfake Video Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1133-1143} }
Video-P2P: Video Editing with Cross-attention Control: Shaoteng Liu,

Yuechen Zhang,

Wenbo Li,

Zhe Lin,

Jiaya Jia; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Shaoteng and Zhang, Yuechen and Li, Wenbo and Lin, Zhe and Jia, Jiaya}, title = {Video-P2P: Video Editing with Cross-attention Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8599-8608} }
Hunting Attributes: Context Prototype-Aware Learning for Weakly Supervised Semantic Segmentation: Feilong Tang,

Zhongxing Xu,

Zhaojun Qu,

Wei Feng,

Xingjian Jiang,

Zongyuan Ge; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2024_CVPR, author = {Tang, Feilong and Xu, Zhongxing and Qu, Zhaojun and Feng, Wei and Jiang, Xingjian and Ge, Zongyuan}, title = {Hunting Attributes: Context Prototype-Aware Learning for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3324-3334} }
PIE-NeRF: Physics-based Interactive Elastodynamics with NeRF: Yutao Feng,

Yintong Shang,

Xuan Li,

Tianjia Shao,

Chenfanfu Jiang,

Yin Yang; [pdf] [supp]
[bibtex]
@InProceedings{Feng_2024_CVPR, author = {Feng, Yutao and Shang, Yintong and Li, Xuan and Shao, Tianjia and Jiang, Chenfanfu and Yang, Yin}, title = {PIE-NeRF: Physics-based Interactive Elastodynamics with NeRF}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4450-4461} }
FlashAvatar: High-fidelity Head Avatar with Efficient Gaussian Embedding: Jun Xiang,

Xuan Gao,

Yudong Guo,

Juyong Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiang_2024_CVPR, author = {Xiang, Jun and Gao, Xuan and Guo, Yudong and Zhang, Juyong}, title = {FlashAvatar: High-fidelity Head Avatar with Efficient Gaussian Embedding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1802-1812} }
ZERO-IG: Zero-Shot Illumination-Guided Joint Denoising and Adaptive Enhancement for Low-Light Images: Yiqi Shi,

Duo Liu,

Liguo Zhang,

Ye Tian,

Xuezhi Xia,

Xiaojing Fu; [pdf] [supp]
[bibtex]
@InProceedings{Shi_2024_CVPR, author = {Shi, Yiqi and Liu, Duo and Zhang, Liguo and Tian, Ye and Xia, Xuezhi and Fu, Xiaojing}, title = {ZERO-IG: Zero-Shot Illumination-Guided Joint Denoising and Adaptive Enhancement for Low-Light Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3015-3024} }
FinePOSE: Fine-Grained Prompt-Driven 3D Human Pose Estimation via Diffusion Models: Jinglin Xu,

Yijie Guo,

Yuxin Peng; [pdf] [arXiv]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Jinglin and Guo, Yijie and Peng, Yuxin}, title = {FinePOSE: Fine-Grained Prompt-Driven 3D Human Pose Estimation via Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {561-570} }
DreamPropeller: Supercharge Text-to-3D Generation with Parallel Sampling: Linqi Zhou,

Andy Shih,

Chenlin Meng,

Stefano Ermon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2024_CVPR, author = {Zhou, Linqi and Shih, Andy and Meng, Chenlin and Ermon, Stefano}, title = {DreamPropeller: Supercharge Text-to-3D Generation with Parallel Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4610-4619} }
Dysen-VDM: Empowering Dynamics-aware Text-to-Video Diffusion with LLMs: Hao Fei,

Shengqiong Wu,

Wei Ji,

Hanwang Zhang,

Tat-Seng Chua; [pdf] [supp]
[bibtex]
@InProceedings{Fei_2024_CVPR, author = {Fei, Hao and Wu, Shengqiong and Ji, Wei and Zhang, Hanwang and Chua, Tat-Seng}, title = {Dysen-VDM: Empowering Dynamics-aware Text-to-Video Diffusion with LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7641-7653} }
General Object Foundation Model for Images and Videos at Scale: Junfeng Wu,

Yi Jiang,

Qihao Liu,

Zehuan Yuan,

Xiang Bai,

Song Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, Junfeng and Jiang, Yi and Liu, Qihao and Yuan, Zehuan and Bai, Xiang and Bai, Song}, title = {General Object Foundation Model for Images and Videos at Scale}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3783-3795} }
Inlier Confidence Calibration for Point Cloud Registration: Yongzhe Yuan,

Yue Wu,

Xiaolong Fan,

Maoguo Gong,

Qiguang Miao,

Wenping Ma; [pdf] [supp]
[bibtex]
@InProceedings{Yuan_2024_CVPR, author = {Yuan, Yongzhe and Wu, Yue and Fan, Xiaolong and Gong, Maoguo and Miao, Qiguang and Ma, Wenping}, title = {Inlier Confidence Calibration for Point Cloud Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5312-5321} }
Readout Guidance: Learning Control from Diffusion Features: Grace Luo,

Trevor Darrell,

Oliver Wang,

Dan B Goldman,

Aleksander Holynski; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2024_CVPR, author = {Luo, Grace and Darrell, Trevor and Wang, Oliver and Goldman, Dan B and Holynski, Aleksander}, title = {Readout Guidance: Learning Control from Diffusion Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8217-8227} }
A Unified Approach for Text- and Image-guided 4D Scene Generation: Yufeng Zheng,

Xueting Li,

Koki Nagano,

Sifei Liu,

Otmar Hilliges,

Shalini De Mello; [pdf] [supp]
[bibtex]
@InProceedings{Zheng_2024_CVPR, author = {Zheng, Yufeng and Li, Xueting and Nagano, Koki and Liu, Sifei and Hilliges, Otmar and De Mello, Shalini}, title = {A Unified Approach for Text- and Image-guided 4D Scene Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7300-7309} }
GaussianAvatar: Towards Realistic Human Avatar Modeling from a Single Video via Animatable 3D Gaussians: Liangxiao Hu,

Hongwen Zhang,

Yuxiang Zhang,

Boyao Zhou,

Boning Liu,

Shengping Zhang,

Liqiang Nie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2024_CVPR, author = {Hu, Liangxiao and Zhang, Hongwen and Zhang, Yuxiang and Zhou, Boyao and Liu, Boning and Zhang, Shengping and Nie, Liqiang}, title = {GaussianAvatar: Towards Realistic Human Avatar Modeling from a Single Video via Animatable 3D Gaussians}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {634-644} }
Mosaic-SDF for 3D Generative Models: Lior Yariv,

Omri Puny,

Oran Gafni,

Yaron Lipman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yariv_2024_CVPR, author = {Yariv, Lior and Puny, Omri and Gafni, Oran and Lipman, Yaron}, title = {Mosaic-SDF for 3D Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4630-4639} }
Diffusion Handles Enabling 3D Edits for Diffusion Models by Lifting Activations to 3D: Karran Pandey,

Paul Guerrero,

Matheus Gadelha,

Yannick Hold-Geoffroy,

Karan Singh,

Niloy J. Mitra; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pandey_2024_CVPR, author = {Pandey, Karran and Guerrero, Paul and Gadelha, Matheus and Hold-Geoffroy, Yannick and Singh, Karan and Mitra, Niloy J.}, title = {Diffusion Handles Enabling 3D Edits for Diffusion Models by Lifting Activations to 3D}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7695-7704} }
Friendly Sharpness-Aware Minimization: Tao Li,

Pan Zhou,

Zhengbao He,

Xinwen Cheng,

Xiaolin Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Tao and Zhou, Pan and He, Zhengbao and Cheng, Xinwen and Huang, Xiaolin}, title = {Friendly Sharpness-Aware Minimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5631-5640} }
BIVDiff: A Training-Free Framework for General-Purpose Video Synthesis via Bridging Image and Video Diffusion Models: Fengyuan Shi,

Jiaxi Gu,

Hang Xu,

Songcen Xu,

Wei Zhang,

Limin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2024_CVPR, author = {Shi, Fengyuan and Gu, Jiaxi and Xu, Hang and Xu, Songcen and Zhang, Wei and Wang, Limin}, title = {BIVDiff: A Training-Free Framework for General-Purpose Video Synthesis via Bridging Image and Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7393-7402} }
NC-TTT: A Noise Constrastive Approach for Test-Time Training: David Osowiechi,

Gustavo A. Vargas Hakim,

Mehrdad Noori,

Milad Cheraghalikhani,

Ali Bahri,

Moslem Yazdanpanah,

Ismail Ben Ayed,

Christian Desrosiers; [pdf] [supp]
[bibtex]
@InProceedings{Osowiechi_2024_CVPR, author = {Osowiechi, David and Hakim, Gustavo A. Vargas and Noori, Mehrdad and Cheraghalikhani, Milad and Bahri, Ali and Yazdanpanah, Moslem and Ben Ayed, Ismail and Desrosiers, Christian}, title = {NC-TTT: A Noise Constrastive Approach for Test-Time Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6078-6086} }
Small Scale Data-Free Knowledge Distillation: He Liu,

Yikai Wang,

Huaping Liu,

Fuchun Sun,

Anbang Yao; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, He and Wang, Yikai and Liu, Huaping and Sun, Fuchun and Yao, Anbang}, title = {Small Scale Data-Free Knowledge Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6008-6016} }
CFPL-FAS: Class Free Prompt Learning for Generalizable Face Anti-spoofing: Ajian Liu,

Shuai Xue,

Jianwen Gan,

Jun Wan,

Yanyan Liang,

Jiankang Deng,

Sergio Escalera,

Zhen Lei; [pdf]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Ajian and Xue, Shuai and Gan, Jianwen and Wan, Jun and Liang, Yanyan and Deng, Jiankang and Escalera, Sergio and Lei, Zhen}, title = {CFPL-FAS: Class Free Prompt Learning for Generalizable Face Anti-spoofing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {222-232} }
Open Vocabulary Semantic Scene Sketch Understanding: Ahmed Bourouis,

Judith E. Fan,

Yulia Gryaditskaya; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bourouis_2024_CVPR, author = {Bourouis, Ahmed and Fan, Judith E. and Gryaditskaya, Yulia}, title = {Open Vocabulary Semantic Scene Sketch Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4176-4186} }
IntrinsicAvatar: Physically Based Inverse Rendering of Dynamic Humans from Monocular Videos via Explicit Ray Tracing: Shaofei Wang,

Bozidar Antic,

Andreas Geiger,

Siyu Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Shaofei and Antic, Bozidar and Geiger, Andreas and Tang, Siyu}, title = {IntrinsicAvatar: Physically Based Inverse Rendering of Dynamic Humans from Monocular Videos via Explicit Ray Tracing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1877-1888} }
Efficient Detection of Long Consistent Cycles and its Application to Distributed Synchronization: Shaohan Li,

Yunpeng Shi,

Gilad Lerman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Shaohan and Shi, Yunpeng and Lerman, Gilad}, title = {Efficient Detection of Long Consistent Cycles and its Application to Distributed Synchronization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5260-5269} }
Vlogger: Make Your Dream A Vlog: Shaobin Zhuang,

Kunchang Li,

Xinyuan Chen,

Yaohui Wang,

Ziwei Liu,

Yu Qiao,

Yali Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhuang_2024_CVPR, author = {Zhuang, Shaobin and Li, Kunchang and Chen, Xinyuan and Wang, Yaohui and Liu, Ziwei and Qiao, Yu and Wang, Yali}, title = {Vlogger: Make Your Dream A Vlog}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8806-8817} }
Neural 3D Strokes: Creating Stylized 3D Scenes with Vectorized 3D Strokes: Hao-Bin Duan,

Miao Wang,

Yan-Xun Li,

Yong-Liang Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Duan_2024_CVPR, author = {Duan, Hao-Bin and Wang, Miao and Li, Yan-Xun and Yang, Yong-Liang}, title = {Neural 3D Strokes: Creating Stylized 3D Scenes with Vectorized 3D Strokes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5240-5249} }
Multi-Object Tracking in the Dark: Xinzhe Wang,

Kang Ma,

Qiankun Liu,

Yunhao Zou,

Ying Fu; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Xinzhe and Ma, Kang and Liu, Qiankun and Zou, Yunhao and Fu, Ying}, title = {Multi-Object Tracking in the Dark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {382-392} }
UniHuman: A Unified Model For Editing Human Images in the Wild: Nannan Li,

Qing Liu,

Krishna Kumar Singh,

Yilin Wang,

Jianming Zhang,

Bryan A. Plummer,

Zhe Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Nannan and Liu, Qing and Singh, Krishna Kumar and Wang, Yilin and Zhang, Jianming and Plummer, Bryan A. and Lin, Zhe}, title = {UniHuman: A Unified Model For Editing Human Images in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2039-2048} }
DiffAgent: Fast and Accurate Text-to-Image API Selection with Large Language Model: Lirui Zhao,

Yue Yang,

Kaipeng Zhang,

Wenqi Shao,

Yuxin Zhang,

Yu Qiao,

Ping Luo,

Rongrong Ji; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhao_2024_CVPR, author = {Zhao, Lirui and Yang, Yue and Zhang, Kaipeng and Shao, Wenqi and Zhang, Yuxin and Qiao, Yu and Luo, Ping and Ji, Rongrong}, title = {DiffAgent: Fast and Accurate Text-to-Image API Selection with Large Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6390-6399} }
In Search of a Data Transformation That Accelerates Neural Field Training: Junwon Seo,

Sangyoon Lee,

Kwang In Kim,

Jaeho Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Seo_2024_CVPR, author = {Seo, Junwon and Lee, Sangyoon and Kim, Kwang In and Lee, Jaeho}, title = {In Search of a Data Transformation That Accelerates Neural Field Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4830-4839} }
Zero-Painter: Training-Free Layout Control for Text-to-Image Synthesis: Marianna Ohanyan,

Hayk Manukyan,

Zhangyang Wang,

Shant Navasardyan,

Humphrey Shi; [pdf] [supp]
[bibtex]
@InProceedings{Ohanyan_2024_CVPR, author = {Ohanyan, Marianna and Manukyan, Hayk and Wang, Zhangyang and Navasardyan, Shant and Shi, Humphrey}, title = {Zero-Painter: Training-Free Layout Control for Text-to-Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8764-8774} }
Towards 3D Vision with Low-Cost Single-Photon Cameras: Fangzhou Mu,

Carter Sifferman,

Sacha Jungerman,

Yiquan Li,

Mark Han,

Michael Gleicher,

Mohit Gupta,

Yin Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mu_2024_CVPR, author = {Mu, Fangzhou and Sifferman, Carter and Jungerman, Sacha and Li, Yiquan and Han, Mark and Gleicher, Michael and Gupta, Mohit and Li, Yin}, title = {Towards 3D Vision with Low-Cost Single-Photon Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5302-5311} }
WonderJourney: Going from Anywhere to Everywhere: Hong-Xing Yu,

Haoyi Duan,

Junhwa Hur,

Kyle Sargent,

Michael Rubinstein,

William T. Freeman,

Forrester Cole,

Deqing Sun,

Noah Snavely,

Jiajun Wu,

Charles Herrmann; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2024_CVPR, author = {Yu, Hong-Xing and Duan, Haoyi and Hur, Junhwa and Sargent, Kyle and Rubinstein, Michael and Freeman, William T. and Cole, Forrester and Sun, Deqing and Snavely, Noah and Wu, Jiajun and Herrmann, Charles}, title = {WonderJourney: Going from Anywhere to Everywhere}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6658-6667} }
4D-fy: Text-to-4D Generation Using Hybrid Score Distillation Sampling: Sherwin Bahmani,

Ivan Skorokhodov,

Victor Rong,

Gordon Wetzstein,

Leonidas Guibas,

Peter Wonka,

Sergey Tulyakov,

Jeong Joon Park,

Andrea Tagliasacchi,

David B. Lindell; [pdf] [supp]
[bibtex]
@InProceedings{Bahmani_2024_CVPR, author = {Bahmani, Sherwin and Skorokhodov, Ivan and Rong, Victor and Wetzstein, Gordon and Guibas, Leonidas and Wonka, Peter and Tulyakov, Sergey and Park, Jeong Joon and Tagliasacchi, Andrea and Lindell, David B.}, title = {4D-fy: Text-to-4D Generation Using Hybrid Score Distillation Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7996-8006} }
FreeControl: Training-Free Spatial Control of Any Text-to-Image Diffusion Model with Any Condition: Sicheng Mo,

Fangzhou Mu,

Kuan Heng Lin,

Yanli Liu,

Bochen Guan,

Yin Li,

Bolei Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mo_2024_CVPR, author = {Mo, Sicheng and Mu, Fangzhou and Lin, Kuan Heng and Liu, Yanli and Guan, Bochen and Li, Yin and Zhou, Bolei}, title = {FreeControl: Training-Free Spatial Control of Any Text-to-Image Diffusion Model with Any Condition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7465-7475} }
VMC: Video Motion Customization using Temporal Attention Adaption for Text-to-Video Diffusion Models: Hyeonho Jeong,

Geon Yeong Park,

Jong Chul Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeong_2024_CVPR, author = {Jeong, Hyeonho and Park, Geon Yeong and Ye, Jong Chul}, title = {VMC: Video Motion Customization using Temporal Attention Adaption for Text-to-Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9212-9221} }
DistriFusion: Distributed Parallel Inference for High-Resolution Diffusion Models: Muyang Li,

Tianle Cai,

Jiaxin Cao,

Qinsheng Zhang,

Han Cai,

Junjie Bai,

Yangqing Jia,

Kai Li,

Song Han; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Muyang and Cai, Tianle and Cao, Jiaxin and Zhang, Qinsheng and Cai, Han and Bai, Junjie and Jia, Yangqing and Li, Kai and Han, Song}, title = {DistriFusion: Distributed Parallel Inference for High-Resolution Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7183-7193} }
AZ-NAS: Assembling Zero-Cost Proxies for Network Architecture Search: Junghyup Lee,

Bumsub Ham; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2024_CVPR, author = {Lee, Junghyup and Ham, Bumsub}, title = {AZ-NAS: Assembling Zero-Cost Proxies for Network Architecture Search}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5893-5903} }
Improving Physics-Augmented Continuum Neural Radiance Field-Based Geometry-Agnostic System Identification with Lagrangian Particle Optimization: Takuhiro Kaneko; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kaneko_2024_CVPR, author = {Kaneko, Takuhiro}, title = {Improving Physics-Augmented Continuum Neural Radiance Field-Based Geometry-Agnostic System Identification with Lagrangian Particle Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5470-5480} }
Beyond Image Super-Resolution for Image Recognition with Task-Driven Perceptual Loss: Jaeha Kim,

Junghun Oh,

Kyoung Mu Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2024_CVPR, author = {Kim, Jaeha and Oh, Junghun and Lee, Kyoung Mu}, title = {Beyond Image Super-Resolution for Image Recognition with Task-Driven Perceptual Loss}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2651-2661} }
XCube: Large-Scale 3D Generative Modeling using Sparse Voxel Hierarchies: Xuanchi Ren,

Jiahui Huang,

Xiaohui Zeng,

Ken Museth,

Sanja Fidler,

Francis Williams; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2024_CVPR, author = {Ren, Xuanchi and Huang, Jiahui and Zeng, Xiaohui and Museth, Ken and Fidler, Sanja and Williams, Francis}, title = {XCube: Large-Scale 3D Generative Modeling using Sparse Voxel Hierarchies}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4209-4219} }
Reconstruction-free Cascaded Adaptive Compressive Sensing: Chenxi Qiu,

Tao Yue,

Xuemei Hu; [pdf]
[bibtex]
@InProceedings{Qiu_2024_CVPR, author = {Qiu, Chenxi and Yue, Tao and Hu, Xuemei}, title = {Reconstruction-free Cascaded Adaptive Compressive Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2620-2630} }
USE: Universal Segment Embeddings for Open-Vocabulary Image Segmentation: Xiaoqi Wang,

Wenbin He,

Xiwei Xuan,

Clint Sebastian,

Jorge Piazentin Ono,

Xin Li,

Sima Behpour,

Thang Doan,

Liang Gou,

Han-Wei Shen,

Liu Ren; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Xiaoqi and He, Wenbin and Xuan, Xiwei and Sebastian, Clint and Ono, Jorge Piazentin and Li, Xin and Behpour, Sima and Doan, Thang and Gou, Liang and Shen, Han-Wei and Ren, Liu}, title = {USE: Universal Segment Embeddings for Open-Vocabulary Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4187-4196} }
Functional Diffusion: Biao Zhang,

Peter Wonka; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Biao and Wonka, Peter}, title = {Functional Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4723-4732} }
Wired Perspectives: Multi-View Wire Art Embraces Generative AI: Zhiyu Qu,

Lan Yang,

Honggang Zhang,

Tao Xiang,

Kaiyue Pang,

Yi-Zhe Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qu_2024_CVPR, author = {Qu, Zhiyu and Yang, Lan and Zhang, Honggang and Xiang, Tao and Pang, Kaiyue and Song, Yi-Zhe}, title = {Wired Perspectives: Multi-View Wire Art Embraces Generative AI}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6149-6158} }
Leveraging Camera Triplets for Efficient and Accurate Structure-from-Motion: Lalit Manam,

Venu Madhav Govindu; [pdf] [supp]
[bibtex]
@InProceedings{Manam_2024_CVPR, author = {Manam, Lalit and Govindu, Venu Madhav}, title = {Leveraging Camera Triplets for Efficient and Accurate Structure-from-Motion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4959-4968} }
SimDA: Simple Diffusion Adapter for Efficient Video Generation: Zhen Xing,

Qi Dai,

Han Hu,

Zuxuan Wu,

Yu-Gang Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xing_2024_CVPR, author = {Xing, Zhen and Dai, Qi and Hu, Han and Wu, Zuxuan and Jiang, Yu-Gang}, title = {SimDA: Simple Diffusion Adapter for Efficient Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7827-7839} }
Multi-view Aggregation Network for Dichotomous Image Segmentation: Qian Yu,

Xiaoqi Zhao,

Youwei Pang,

Lihe Zhang,

Huchuan Lu; [pdf] [arXiv]
[bibtex]
@InProceedings{Yu_2024_CVPR, author = {Yu, Qian and Zhao, Xiaoqi and Pang, Youwei and Zhang, Lihe and Lu, Huchuan}, title = {Multi-view Aggregation Network for Dichotomous Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3921-3930} }
A Recipe for Scaling up Text-to-Video Generation with Text-free Videos: Xiang Wang,

Shiwei Zhang,

Hangjie Yuan,

Zhiwu Qing,

Biao Gong,

Yingya Zhang,

Yujun Shen,

Changxin Gao,

Nong Sang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Xiang and Zhang, Shiwei and Yuan, Hangjie and Qing, Zhiwu and Gong, Biao and Zhang, Yingya and Shen, Yujun and Gao, Changxin and Sang, Nong}, title = {A Recipe for Scaling up Text-to-Video Generation with Text-free Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6572-6582} }
Molecular Data Programming: Towards Molecule Pseudo-labeling with Systematic Weak Supervision: Xin Juan,

Kaixiong Zhou,

Ninghao Liu,

Tianlong Chen,

Xin Wang; [pdf] [supp]
[bibtex]
@InProceedings{Juan_2024_CVPR, author = {Juan, Xin and Zhou, Kaixiong and Liu, Ninghao and Chen, Tianlong and Wang, Xin}, title = {Molecular Data Programming: Towards Molecule Pseudo-labeling with Systematic Weak Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {308-318} }
Residual Denoising Diffusion Models: Jiawei Liu,

Qiang Wang,

Huijie Fan,

Yinong Wang,

Yandong Tang,

Liangqiong Qu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Jiawei and Wang, Qiang and Fan, Huijie and Wang, Yinong and Tang, Yandong and Qu, Liangqiong}, title = {Residual Denoising Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2773-2783} }
Towards Accurate and Robust Architectures via Neural Architecture Search: Yuwei Ou,

Yuqi Feng,

Yanan Sun; [pdf] [arXiv]
[bibtex]
@InProceedings{Ou_2024_CVPR, author = {Ou, Yuwei and Feng, Yuqi and Sun, Yanan}, title = {Towards Accurate and Robust Architectures via Neural Architecture Search}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5967-5976} }
Closely Interactive Human Reconstruction with Proxemics and Physics-Guided Adaption: Buzhen Huang,

Chen Li,

Chongyang Xu,

Liang Pan,

Yangang Wang,

Gim Hee Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2024_CVPR, author = {Huang, Buzhen and Li, Chen and Xu, Chongyang and Pan, Liang and Wang, Yangang and Lee, Gim Hee}, title = {Closely Interactive Human Reconstruction with Proxemics and Physics-Guided Adaption}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1011-1021} }
Taming Stable Diffusion for Text to 360 Panorama Image Generation: Cheng Zhang,

Qianyi Wu,

Camilo Cruz Gambardella,

Xiaoshui Huang,

Dinh Phung,

Wanli Ouyang,

Jianfei Cai; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Cheng and Wu, Qianyi and Gambardella, Camilo Cruz and Huang, Xiaoshui and Phung, Dinh and Ouyang, Wanli and Cai, Jianfei}, title = {Taming Stable Diffusion for Text to 360 Panorama Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6347-6357} }
Modular Blind Video Quality Assessment: Wen Wen,

Mu Li,

Yabin Zhang,

Yiting Liao,

Junlin Li,

Li Zhang,

Kede Ma; [pdf] [arXiv]
[bibtex]
@InProceedings{Wen_2024_CVPR, author = {Wen, Wen and Li, Mu and Zhang, Yabin and Liao, Yiting and Li, Junlin and Zhang, Li and Ma, Kede}, title = {Modular Blind Video Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2763-2772} }
RELI11D: A Comprehensive Multimodal Human Motion Dataset and Method: Ming Yan,

Yan Zhang,

Shuqiang Cai,

Shuqi Fan,

Xincheng Lin,

Yudi Dai,

Siqi Shen,

Chenglu Wen,

Lan Xu,

Yuexin Ma,

Cheng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2024_CVPR, author = {Yan, Ming and Zhang, Yan and Cai, Shuqiang and Fan, Shuqi and Lin, Xincheng and Dai, Yudi and Shen, Siqi and Wen, Chenglu and Xu, Lan and Ma, Yuexin and Wang, Cheng}, title = {RELI11D: A Comprehensive Multimodal Human Motion Dataset and Method}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2250-2262} }
One-Class Face Anti-spoofing via Spoof Cue Map-Guided Feature Learning: Pei-Kai Huang,

Cheng-Hsuan Chiang,

Tzu-Hsien Chen,

Jun-Xiong Chong,

Tyng-Luh Liu,

Chiou-Ting Hsu; [pdf]
[bibtex]
@InProceedings{Huang_2024_CVPR, author = {Huang, Pei-Kai and Chiang, Cheng-Hsuan and Chen, Tzu-Hsien and Chong, Jun-Xiong and Liu, Tyng-Luh and Hsu, Chiou-Ting}, title = {One-Class Face Anti-spoofing via Spoof Cue Map-Guided Feature Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {277-286} }
InteractDiffusion: Interaction Control in Text-to-Image Diffusion Models: Jiun Tian Hoe,

Xudong Jiang,

Chee Seng Chan,

Yap-Peng Tan,

Weipeng Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hoe_2024_CVPR, author = {Hoe, Jiun Tian and Jiang, Xudong and Chan, Chee Seng and Tan, Yap-Peng and Hu, Weipeng}, title = {InteractDiffusion: Interaction Control in Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6180-6189} }
Emergent Open-Vocabulary Semantic Segmentation from Off-the-shelf Vision-Language Models: Jiayun Luo,

Siddhesh Khandelwal,

Leonid Sigal,

Boyang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2024_CVPR, author = {Luo, Jiayun and Khandelwal, Siddhesh and Sigal, Leonid and Li, Boyang}, title = {Emergent Open-Vocabulary Semantic Segmentation from Off-the-shelf Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4029-4040} }
SelfPose3d: Self-Supervised Multi-Person Multi-View 3d Pose Estimation: Vinkle Srivastav,

Keqi Chen,

Nicolas Padoy; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Srivastav_2024_CVPR, author = {Srivastav, Vinkle and Chen, Keqi and Padoy, Nicolas}, title = {SelfPose3d: Self-Supervised Multi-Person Multi-View 3d Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2502-2512} }
Joint2Human: High-Quality 3D Human Generation via Compact Spherical Embedding of 3D Joints: Muxin Zhang,

Qiao Feng,

Zhuo Su,

Chao Wen,

Zhou Xue,

Kun Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Muxin and Feng, Qiao and Su, Zhuo and Wen, Chao and Xue, Zhou and Li, Kun}, title = {Joint2Human: High-Quality 3D Human Generation via Compact Spherical Embedding of 3D Joints}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1429-1438} }
Prompt-Free Diffusion: Taking "Text" out of Text-to-Image Diffusion Models: Xingqian Xu,

Jiayi Guo,

Zhangyang Wang,

Gao Huang,

Irfan Essa,

Humphrey Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Xingqian and Guo, Jiayi and Wang, Zhangyang and Huang, Gao and Essa, Irfan and Shi, Humphrey}, title = {Prompt-Free Diffusion: Taking ''Text'' out of Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8682-8692} }
Multi-agent Long-term 3D Human Pose Forecasting via Interaction-aware Trajectory Conditioning: Jaewoo Jeong,

Daehee Park,

Kuk-Jin Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeong_2024_CVPR, author = {Jeong, Jaewoo and Park, Daehee and Yoon, Kuk-Jin}, title = {Multi-agent Long-term 3D Human Pose Forecasting via Interaction-aware Trajectory Conditioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1617-1628} }
CLOAF: CoLlisiOn-Aware Human Flow: Andrey Davydov,

Martin Engilberge,

Mathieu Salzmann,

Pascal Fua; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Davydov_2024_CVPR, author = {Davydov, Andrey and Engilberge, Martin and Salzmann, Mathieu and Fua, Pascal}, title = {CLOAF: CoLlisiOn-Aware Human Flow}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1176-1185} }
Hybrid Functional Maps for Crease-Aware Non-Isometric Shape Matching: Lennart Bastian,

Yizheng Xie,

Nassir Navab,

Zorah Lähner; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bastian_2024_CVPR, author = {Bastian, Lennart and Xie, Yizheng and Navab, Nassir and L\"ahner, Zorah}, title = {Hybrid Functional Maps for Crease-Aware Non-Isometric Shape Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3313-3323} }
Density-Guided Semi-Supervised 3D Semantic Segmentation with Dual-Space Hardness Sampling: Jianan Li,

Qiulei Dong; [pdf] [supp]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Jianan and Dong, Qiulei}, title = {Density-Guided Semi-Supervised 3D Semantic Segmentation with Dual-Space Hardness Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3260-3269} }
ElasticDiffusion: Training-free Arbitrary Size Image Generation through Global-Local Content Separation: Moayed Haji-Ali,

Guha Balakrishnan,

Vicente Ordonez; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Haji-Ali_2024_CVPR, author = {Haji-Ali, Moayed and Balakrishnan, Guha and Ordonez, Vicente}, title = {ElasticDiffusion: Training-free Arbitrary Size Image Generation through Global-Local Content Separation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6603-6612} }
Locally Adaptive Neural 3D Morphable Models: Michail Tarasiou,

Rolandos Alexandros Potamias,

Eimear O'Sullivan,

Stylianos Ploumpis,

Stefanos Zafeiriou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tarasiou_2024_CVPR, author = {Tarasiou, Michail and Potamias, Rolandos Alexandros and O'Sullivan, Eimear and Ploumpis, Stylianos and Zafeiriou, Stefanos}, title = {Locally Adaptive Neural 3D Morphable Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1867-1876} }
ICON: Incremental CONfidence for Joint Pose and Radiance Field Optimization: Weiyao Wang,

Pierre Gleize,

Hao Tang,

Xingyu Chen,

Kevin J Liang,

Matt Feiszli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Weiyao and Gleize, Pierre and Tang, Hao and Chen, Xingyu and Liang, Kevin J and Feiszli, Matt}, title = {ICON: Incremental CONfidence for Joint Pose and Radiance Field Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5406-5417} }
Learned Scanpaths Aid Blind Panoramic Video Quality Assessment: Kanglong Fan,

Wen Wen,

Mu Li,

Yifan Peng,

Kede Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2024_CVPR, author = {Fan, Kanglong and Wen, Wen and Li, Mu and Peng, Yifan and Ma, Kede}, title = {Learned Scanpaths Aid Blind Panoramic Video Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2599-2608} }
TI2V-Zero: Zero-Shot Image Conditioning for Text-to-Video Diffusion Models: Haomiao Ni,

Bernhard Egger,

Suhas Lohit,

Anoop Cherian,

Ye Wang,

Toshiaki Koike-Akino,

Sharon X. Huang,

Tim K. Marks; [pdf] [supp]
[bibtex]
@InProceedings{Ni_2024_CVPR, author = {Ni, Haomiao and Egger, Bernhard and Lohit, Suhas and Cherian, Anoop and Wang, Ye and Koike-Akino, Toshiaki and Huang, Sharon X. and Marks, Tim K.}, title = {TI2V-Zero: Zero-Shot Image Conditioning for Text-to-Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9015-9025} }
iToF-flow-based High Frame Rate Depth Imaging: Yu Meng,

Zhou Xue,

Xu Chang,

Xuemei Hu,

Tao Yue; [pdf]
[bibtex]
@InProceedings{Meng_2024_CVPR, author = {Meng, Yu and Xue, Zhou and Chang, Xu and Hu, Xuemei and Yue, Tao}, title = {iToF-flow-based High Frame Rate Depth Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4929-4938} }
Relightful Harmonization: Lighting-aware Portrait Background Replacement: Mengwei Ren,

Wei Xiong,

Jae Shin Yoon,

Zhixin Shu,

Jianming Zhang,

HyunJoon Jung,

Guido Gerig,

He Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2024_CVPR, author = {Ren, Mengwei and Xiong, Wei and Yoon, Jae Shin and Shu, Zhixin and Zhang, Jianming and Jung, HyunJoon and Gerig, Guido and Zhang, He}, title = {Relightful Harmonization: Lighting-aware Portrait Background Replacement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6452-6462} }
Mitigating Motion Blur in Neural Radiance Fields with Events and Frames: Marco Cannici,

Davide Scaramuzza; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cannici_2024_CVPR, author = {Cannici, Marco and Scaramuzza, Davide}, title = {Mitigating Motion Blur in Neural Radiance Fields with Events and Frames}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9286-9296} }
TokenHMR: Advancing Human Mesh Recovery with a Tokenized Pose Representation: Sai Kumar Dwivedi,

Yu Sun,

Priyanka Patel,

Yao Feng,

Michael J. Black; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dwivedi_2024_CVPR, author = {Dwivedi, Sai Kumar and Sun, Yu and Patel, Priyanka and Feng, Yao and Black, Michael J.}, title = {TokenHMR: Advancing Human Mesh Recovery with a Tokenized Pose Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1323-1333} }
FaceCom: Towards High-fidelity 3D Facial Shape Completion via Optimization and Inpainting Guidance: Yinglong Li,

Hongyu Wu,

Xiaogang Wang,

Qingzhao Qin,

Yijiao Zhao,

Yong Wang,

Aimin Hao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Yinglong and Wu, Hongyu and Wang, Xiaogang and Qin, Qingzhao and Zhao, Yijiao and Wang, Yong and Hao, Aimin}, title = {FaceCom: Towards High-fidelity 3D Facial Shape Completion via Optimization and Inpainting Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2177-2186} }
LightOctree: Lightweight 3D Spatially-Coherent Indoor Lighting Estimation: Xuecan Wang,

Shibang Xiao,

Xiaohui Liang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Xuecan and Xiao, Shibang and Liang, Xiaohui}, title = {LightOctree: Lightweight 3D Spatially-Coherent Indoor Lighting Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4536-4545} }
FaceLift: Semi-supervised 3D Facial Landmark Localization: David Ferman,

Pablo Garrido,

Gaurav Bharaj; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ferman_2024_CVPR, author = {Ferman, David and Garrido, Pablo and Bharaj, Gaurav}, title = {FaceLift: Semi-supervised 3D Facial Landmark Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1781-1791} }
PSDPM: Prototype-based Secondary Discriminative Pixels Mining for Weakly Supervised Semantic Segmentation: Xinqiao Zhao,

Ziqian Yang,

Tianhong Dai,

Bingfeng Zhang,

Jimin Xiao; [pdf]
[bibtex]
@InProceedings{Zhao_2024_CVPR, author = {Zhao, Xinqiao and Yang, Ziqian and Dai, Tianhong and Zhang, Bingfeng and Xiao, Jimin}, title = {PSDPM: Prototype-based Secondary Discriminative Pixels Mining for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3437-3446} }
Frozen CLIP: A Strong Backbone for Weakly Supervised Semantic Segmentation: Bingfeng Zhang,

Siyue Yu,

Yunchao Wei,

Yao Zhao,

Jimin Xiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Bingfeng and Yu, Siyue and Wei, Yunchao and Zhao, Yao and Xiao, Jimin}, title = {Frozen CLIP: A Strong Backbone for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3796-3806} }
LAFS: Landmark-based Facial Self-supervised Learning for Face Recognition: Zhonglin Sun,

Chen Feng,

Ioannis Patras,

Georgios Tzimiropoulos; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2024_CVPR, author = {Sun, Zhonglin and Feng, Chen and Patras, Ioannis and Tzimiropoulos, Georgios}, title = {LAFS: Landmark-based Facial Self-supervised Learning for Face Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1639-1649} }
SED: A Simple Encoder-Decoder for Open-Vocabulary Semantic Segmentation: Bin Xie,

Jiale Cao,

Jin Xie,

Fahad Shahbaz Khan,

Yanwei Pang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2024_CVPR, author = {Xie, Bin and Cao, Jiale and Xie, Jin and Khan, Fahad Shahbaz and Pang, Yanwei}, title = {SED: A Simple Encoder-Decoder for Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3426-3436} }
GPLD3D: Latent Diffusion of 3D Shape Generative Models by Enforcing Geometric and Physical Priors: Yuan Dong,

Qi Zuo,

Xiaodong Gu,

Weihao Yuan,

Zhengyi Zhao,

Zilong Dong,

Liefeng Bo,

Qixing Huang; [pdf] [supp]
[bibtex]
@InProceedings{Dong_2024_CVPR, author = {Dong, Yuan and Zuo, Qi and Gu, Xiaodong and Yuan, Weihao and Zhao, Zhengyi and Dong, Zilong and Bo, Liefeng and Huang, Qixing}, title = {GPLD3D: Latent Diffusion of 3D Shape Generative Models by Enforcing Geometric and Physical Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {56-66} }
Self-correcting LLM-controlled Diffusion Models: Tsung-Han Wu,

Long Lian,

Joseph E. Gonzalez,

Boyi Li,

Trevor Darrell; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, Tsung-Han and Lian, Long and Gonzalez, Joseph E. and Li, Boyi and Darrell, Trevor}, title = {Self-correcting LLM-controlled Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6327-6336} }
PACER+: On-Demand Pedestrian Animation Controller in Driving Scenarios: Jingbo Wang,

Zhengyi Luo,

Ye Yuan,

Yixuan Li,

Bo Dai; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Jingbo and Luo, Zhengyi and Yuan, Ye and Li, Yixuan and Dai, Bo}, title = {PACER+: On-Demand Pedestrian Animation Controller in Driving Scenarios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {718-728} }
LTM: Lightweight Textured Mesh Extraction and Refinement of Large Unbounded Scenes for Efficient Storage and Real-time Rendering: Jaehoon Choi,

Rajvi Shah,

Qinbo Li,

Yipeng Wang,

Ayush Saraf,

Changil Kim,

Jia-Bin Huang,

Dinesh Manocha,

Suhib Alsisan,

Johannes Kopf; [pdf] [supp]
[bibtex]
@InProceedings{Choi_2024_CVPR, author = {Choi, Jaehoon and Shah, Rajvi and Li, Qinbo and Wang, Yipeng and Saraf, Ayush and Kim, Changil and Huang, Jia-Bin and Manocha, Dinesh and Alsisan, Suhib and Kopf, Johannes}, title = {LTM: Lightweight Textured Mesh Extraction and Refinement of Large Unbounded Scenes for Efficient Storage and Real-time Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5053-5063} }
Don't Drop Your Samples! Coherence-Aware Training Benefits Conditional Diffusion: Nicolas Dufour,

Victor Besnier,

Vicky Kalogeiton,

David Picard; [pdf] [supp]
[bibtex]
@InProceedings{Dufour_2024_CVPR, author = {Dufour, Nicolas and Besnier, Victor and Kalogeiton, Vicky and Picard, David}, title = {Don't Drop Your Samples! Coherence-Aware Training Benefits Conditional Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6264-6273} }
What Do You See in Vehicle? Comprehensive Vision Solution for In-Vehicle Gaze Estimation: Yihua Cheng,

Yaning Zhu,

Zongji Wang,

Hongquan Hao,

Yongwei Liu,

Shiqing Cheng,

Xi Wang,

Hyung Jin Chang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2024_CVPR, author = {Cheng, Yihua and Zhu, Yaning and Wang, Zongji and Hao, Hongquan and Liu, Yongwei and Cheng, Shiqing and Wang, Xi and Chang, Hyung Jin}, title = {What Do You See in Vehicle? Comprehensive Vision Solution for In-Vehicle Gaze Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1556-1565} }
UFORecon: Generalizable Sparse-View Surface Reconstruction from Arbitrary and Unfavorable Sets: Youngju Na,

Woo Jae Kim,

Kyu Beom Han,

Suhyeon Ha,

Sung-Eui Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Na_2024_CVPR, author = {Na, Youngju and Kim, Woo Jae and Han, Kyu Beom and Ha, Suhyeon and Yoon, Sung-Eui}, title = {UFORecon: Generalizable Sparse-View Surface Reconstruction from Arbitrary and Unfavorable Sets}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5094-5104} }
Breathing Life Into Sketches Using Text-to-Video Priors: Rinon Gal,

Yael Vinker,

Yuval Alaluf,

Amit Bermano,

Daniel Cohen-Or,

Ariel Shamir,

Gal Chechik; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gal_2024_CVPR, author = {Gal, Rinon and Vinker, Yael and Alaluf, Yuval and Bermano, Amit and Cohen-Or, Daniel and Shamir, Ariel and Chechik, Gal}, title = {Breathing Life Into Sketches Using Text-to-Video Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4325-4336} }
Learning Diffusion Texture Priors for Image Restoration: Tian Ye,

Sixiang Chen,

Wenhao Chai,

Zhaohu Xing,

Jing Qin,

Ge Lin,

Lei Zhu; [pdf]
[bibtex]
@InProceedings{Ye_2024_CVPR, author = {Ye, Tian and Chen, Sixiang and Chai, Wenhao and Xing, Zhaohu and Qin, Jing and Lin, Ge and Zhu, Lei}, title = {Learning Diffusion Texture Priors for Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2524-2534} }
Entangled View-Epipolar Information Aggregation for Generalizable Neural Radiance Fields: Zhiyuan Min,

Yawei Luo,

Wei Yang,

Yuesong Wang,

Yi Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Min_2024_CVPR, author = {Min, Zhiyuan and Luo, Yawei and Yang, Wei and Wang, Yuesong and Yang, Yi}, title = {Entangled View-Epipolar Information Aggregation for Generalizable Neural Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4906-4916} }
YolOOD: Utilizing Object Detection Concepts for Multi-Label Out-of-Distribution Detection: Alon Zolfi,

Guy Amit,

Amit Baras,

Satoru Koda,

Ikuya Morikawa,

Yuval Elovici,

Asaf Shabtai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zolfi_2024_CVPR, author = {Zolfi, Alon and Amit, Guy and Baras, Amit and Koda, Satoru and Morikawa, Ikuya and Elovici, Yuval and Shabtai, Asaf}, title = {YolOOD: Utilizing Object Detection Concepts for Multi-Label Out-of-Distribution Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5788-5797} }
Collaborating Foundation Models for Domain Generalized Semantic Segmentation: Yasser Benigmim,

Subhankar Roy,

Slim Essid,

Vicky Kalogeiton,

Stéphane Lathuilière; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Benigmim_2024_CVPR, author = {Benigmim, Yasser and Roy, Subhankar and Essid, Slim and Kalogeiton, Vicky and Lathuili\`ere, St\'ephane}, title = {Collaborating Foundation Models for Domain Generalized Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3108-3119} }
Towards Variable and Coordinated Holistic Co-Speech Motion Generation: Yifei Liu,

Qiong Cao,

Yandong Wen,

Huaiguang Jiang,

Changxing Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Yifei and Cao, Qiong and Wen, Yandong and Jiang, Huaiguang and Ding, Changxing}, title = {Towards Variable and Coordinated Holistic Co-Speech Motion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1566-1576} }
AllSpark: Reborn Labeled Features from Unlabeled in Transformer for Semi-Supervised Semantic Segmentation: Haonan Wang,

Qixiang Zhang,

Yi Li,

Xiaomeng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Haonan and Zhang, Qixiang and Li, Yi and Li, Xiaomeng}, title = {AllSpark: Reborn Labeled Features from Unlabeled in Transformer for Semi-Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3627-3636} }
SIGNeRF: Scene Integrated Generation for Neural Radiance Fields: Jan-Niklas Dihlmann,

Andreas Engelhardt,

Hendrik Lensch; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dihlmann_2024_CVPR, author = {Dihlmann, Jan-Niklas and Engelhardt, Andreas and Lensch, Hendrik}, title = {SIGNeRF: Scene Integrated Generation for Neural Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6679-6688} }
Generating Illustrated Instructions: Sachit Menon,

Ishan Misra,

Rohit Girdhar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Menon_2024_CVPR, author = {Menon, Sachit and Misra, Ishan and Girdhar, Rohit}, title = {Generating Illustrated Instructions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6274-6284} }
Robust Image Denoising through Adversarial Frequency Mixup: Donghun Ryou,

Inju Ha,

Hyewon Yoo,

Dongwan Kim,

Bohyung Han; [pdf] [supp]
[bibtex]
@InProceedings{Ryou_2024_CVPR, author = {Ryou, Donghun and Ha, Inju and Yoo, Hyewon and Kim, Dongwan and Han, Bohyung}, title = {Robust Image Denoising through Adversarial Frequency Mixup}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2723-2732} }
AnyScene: Customized Image Synthesis with Composited Foreground: Ruidong Chen,

Lanjun Wang,

Weizhi Nie,

Yongdong Zhang,

An-An Liu; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Ruidong and Wang, Lanjun and Nie, Weizhi and Zhang, Yongdong and Liu, An-An}, title = {AnyScene: Customized Image Synthesis with Composited Foreground}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8724-8733} }
Training Generative Image Super-Resolution Models by Wavelet-Domain Losses Enables Better Control of Artifacts: Cansu Korkmaz,

A. Murat Tekalp,

Zafer Dogan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Korkmaz_2024_CVPR, author = {Korkmaz, Cansu and Tekalp, A. Murat and Dogan, Zafer}, title = {Training Generative Image Super-Resolution Models by Wavelet-Domain Losses Enables Better Control of Artifacts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5926-5936} }
Monocular Identity-Conditioned Facial Reflectance Reconstruction: Xingyu Ren,

Jiankang Deng,

Yuhao Cheng,

Jia Guo,

Chao Ma,

Yichao Yan,

Wenhan Zhu,

Xiaokang Yang; [pdf] [arXiv]
[bibtex]
@InProceedings{Ren_2024_CVPR, author = {Ren, Xingyu and Deng, Jiankang and Cheng, Yuhao and Guo, Jia and Ma, Chao and Yan, Yichao and Zhu, Wenhan and Yang, Xiaokang}, title = {Monocular Identity-Conditioned Facial Reflectance Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {885-895} }
C3: High-Performance and Low-Complexity Neural Compression from a Single Image or Video: Hyunjik Kim,

Matthias Bauer,

Lucas Theis,

Jonathan Richard Schwarz,

Emilien Dupont; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2024_CVPR, author = {Kim, Hyunjik and Bauer, Matthias and Theis, Lucas and Schwarz, Jonathan Richard and Dupont, Emilien}, title = {C3: High-Performance and Low-Complexity Neural Compression from a Single Image or Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9347-9358} }
Revisiting Non-Autoregressive Transformers for Efficient Image Synthesis: Zanlin Ni,

Yulin Wang,

Renping Zhou,

Jiayi Guo,

Jinyi Hu,

Zhiyuan Liu,

Shiji Song,

Yuan Yao,

Gao Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ni_2024_CVPR, author = {Ni, Zanlin and Wang, Yulin and Zhou, Renping and Guo, Jiayi and Hu, Jinyi and Liu, Zhiyuan and Song, Shiji and Yao, Yuan and Huang, Gao}, title = {Revisiting Non-Autoregressive Transformers for Efficient Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7007-7016} }
ANIM: Accurate Neural Implicit Model for Human Reconstruction from a single RGB-D Image: Marco Pesavento,

Yuanlu Xu,

Nikolaos Sarafianos,

Robert Maier,

Ziyan Wang,

Chun-Han Yao,

Marco Volino,

Edmond Boyer,

Adrian Hilton,

Tony Tung; [pdf] [supp]
[bibtex]
@InProceedings{Pesavento_2024_CVPR, author = {Pesavento, Marco and Xu, Yuanlu and Sarafianos, Nikolaos and Maier, Robert and Wang, Ziyan and Yao, Chun-Han and Volino, Marco and Boyer, Edmond and Hilton, Adrian and Tung, Tony}, title = {ANIM: Accurate Neural Implicit Model for Human Reconstruction from a single RGB-D Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5448-5458} }
Real-Time Simulated Avatar from Head-Mounted Sensors: Zhengyi Luo,

Jinkun Cao,

Rawal Khirodkar,

Alexander Winkler,

Kris Kitani,

Weipeng Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2024_CVPR, author = {Luo, Zhengyi and Cao, Jinkun and Khirodkar, Rawal and Winkler, Alexander and Kitani, Kris and Xu, Weipeng}, title = {Real-Time Simulated Avatar from Head-Mounted Sensors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {571-581} }
Seamless Human Motion Composition with Blended Positional Encodings: German Barquero,

Sergio Escalera,

Cristina Palmero; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Barquero_2024_CVPR, author = {Barquero, German and Escalera, Sergio and Palmero, Cristina}, title = {Seamless Human Motion Composition with Blended Positional Encodings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {457-469} }
FedUV: Uniformity and Variance for Heterogeneous Federated Learning: Ha Min Son,

Moon-Hyun Kim,

Tai-Myoung Chung,

Chao Huang,

Xin Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Son_2024_CVPR, author = {Son, Ha Min and Kim, Moon-Hyun and Chung, Tai-Myoung and Huang, Chao and Liu, Xin}, title = {FedUV: Uniformity and Variance for Heterogeneous Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5863-5872} }
GAvatar: Animatable 3D Gaussian Avatars with Implicit Mesh Learning: Ye Yuan,

Xueting Li,

Yangyi Huang,

Shalini De Mello,

Koki Nagano,

Jan Kautz,

Umar Iqbal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2024_CVPR, author = {Yuan, Ye and Li, Xueting and Huang, Yangyi and De Mello, Shalini and Nagano, Koki and Kautz, Jan and Iqbal, Umar}, title = {GAvatar: Animatable 3D Gaussian Avatars with Implicit Mesh Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {896-905} }
Grounding Everything: Emerging Localization Properties in Vision-Language Transformers: Walid Bousselham,

Felix Petersen,

Vittorio Ferrari,

Hilde Kuehne; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bousselham_2024_CVPR, author = {Bousselham, Walid and Petersen, Felix and Ferrari, Vittorio and Kuehne, Hilde}, title = {Grounding Everything: Emerging Localization Properties in Vision-Language Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3828-3837} }
Mean-Shift Feature Transformer: Takumi Kobayashi; [pdf] [supp]
[bibtex]
@InProceedings{Kobayashi_2024_CVPR, author = {Kobayashi, Takumi}, title = {Mean-Shift Feature Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6047-6056} }
Domain Separation Graph Neural Networks for Saliency Object Ranking: Zijian Wu,

Jun Lu,

Jing Han,

Lianfa Bai,

Yi Zhang,

Zhuang Zhao,

Siyang Song; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, Zijian and Lu, Jun and Han, Jing and Bai, Lianfa and Zhang, Yi and Zhao, Zhuang and Song, Siyang}, title = {Domain Separation Graph Neural Networks for Saliency Object Ranking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3964-3974} }
RAM-Avatar: Real-time Photo-Realistic Avatar from Monocular Videos with Full-body Control: Xiang Deng,

Zerong Zheng,

Yuxiang Zhang,

Jingxiang Sun,

Chao Xu,

Xiaodong Yang,

Lizhen Wang,

Yebin Liu; [pdf] [supp]
[bibtex]
@InProceedings{Deng_2024_CVPR, author = {Deng, Xiang and Zheng, Zerong and Zhang, Yuxiang and Sun, Jingxiang and Xu, Chao and Yang, Xiaodong and Wang, Lizhen and Liu, Yebin}, title = {RAM-Avatar: Real-time Photo-Realistic Avatar from Monocular Videos with Full-body Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1996-2007} }
Video Prediction by Modeling Videos as Continuous Multi-Dimensional Processes: Gaurav Shrivastava,

Abhinav Shrivastava; [pdf] [supp]
[bibtex]
@InProceedings{Shrivastava_2024_CVPR, author = {Shrivastava, Gaurav and Shrivastava, Abhinav}, title = {Video Prediction by Modeling Videos as Continuous Multi-Dimensional Processes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7236-7245} }
PICTURE: PhotorealistIC virtual Try-on from UnconstRained dEsigns: Shuliang Ning,

Duomin Wang,

Yipeng Qin,

Zirong Jin,

Baoyuan Wang,

Xiaoguang Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ning_2024_CVPR, author = {Ning, Shuliang and Wang, Duomin and Qin, Yipeng and Jin, Zirong and Wang, Baoyuan and Han, Xiaoguang}, title = {PICTURE: PhotorealistIC virtual Try-on from UnconstRained dEsigns}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6976-6985} }
Towards Robust 3D Pose Transfer with Adversarial Learning: Haoyu Chen,

Hao Tang,

Ehsan Adeli,

Guoying Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Haoyu and Tang, Hao and Adeli, Ehsan and Zhao, Guoying}, title = {Towards Robust 3D Pose Transfer with Adversarial Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2295-2304} }
EAGLE: Eigen Aggregation Learning for Object-Centric Unsupervised Semantic Segmentation: Chanyoung Kim,

Woojung Han,

Dayun Ju,

Seong Jae Hwang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2024_CVPR, author = {Kim, Chanyoung and Han, Woojung and Ju, Dayun and Hwang, Seong Jae}, title = {EAGLE: Eigen Aggregation Learning for Object-Centric Unsupervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3523-3533} }
AVID: Any-Length Video Inpainting with Diffusion Model: Zhixing Zhang,

Bichen Wu,

Xiaoyan Wang,

Yaqiao Luo,

Luxin Zhang,

Yinan Zhao,

Peter Vajda,

Dimitris Metaxas,

Licheng Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Zhixing and Wu, Bichen and Wang, Xiaoyan and Luo, Yaqiao and Zhang, Luxin and Zhao, Yinan and Vajda, Peter and Metaxas, Dimitris and Yu, Licheng}, title = {AVID: Any-Length Video Inpainting with Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7162-7172} }
NoiseCollage: A Layout-Aware Text-to-Image Diffusion Model Based on Noise Cropping and Merging: Takahiro Shirakawa,

Seiichi Uchida; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shirakawa_2024_CVPR, author = {Shirakawa, Takahiro and Uchida, Seiichi}, title = {NoiseCollage: A Layout-Aware Text-to-Image Diffusion Model Based on Noise Cropping and Merging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8921-8930} }
Arbitrary Motion Style Transfer with Multi-condition Motion Latent Diffusion Model: Wenfeng Song,

Xingliang Jin,

Shuai Li,

Chenglizhao Chen,

Aimin Hao,

Xia Hou,

Ning Li,

Hong Qin; [pdf] [supp]
[bibtex]
@InProceedings{Song_2024_CVPR, author = {Song, Wenfeng and Jin, Xingliang and Li, Shuai and Chen, Chenglizhao and Hao, Aimin and Hou, Xia and Li, Ning and Qin, Hong}, title = {Arbitrary Motion Style Transfer with Multi-condition Motion Latent Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {821-830} }
ViT-CoMer: Vision Transformer with Convolutional Multi-scale Feature Interaction for Dense Predictions: Chunlong Xia,

Xinliang Wang,

Feng Lv,

Xin Hao,

Yifeng Shi; [pdf]
[bibtex]
@InProceedings{Xia_2024_CVPR, author = {Xia, Chunlong and Wang, Xinliang and Lv, Feng and Hao, Xin and Shi, Yifeng}, title = {ViT-CoMer: Vision Transformer with Convolutional Multi-scale Feature Interaction for Dense Predictions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5493-5502} }
PromptCoT: Align Prompt Distribution via Adapted Chain-of-Thought: Junyi Yao,

Yijiang Liu,

Zhen Dong,

Mingfei Guo,

Helan Hu,

Kurt Keutzer,

Li Du,

Daquan Zhou,

Shanghang Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Yao_2024_CVPR, author = {Yao, Junyi and Liu, Yijiang and Dong, Zhen and Guo, Mingfei and Hu, Helan and Keutzer, Kurt and Du, Li and Zhou, Daquan and Zhang, Shanghang}, title = {PromptCoT: Align Prompt Distribution via Adapted Chain-of-Thought}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7027-7037} }
Anomaly Score: Evaluating Generative Models and Individual Generated Images based on Complexity and Vulnerability: Jaehui Hwang,

Junghyuk Lee,

Jong-Seok Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hwang_2024_CVPR, author = {Hwang, Jaehui and Lee, Junghyuk and Lee, Jong-Seok}, title = {Anomaly Score: Evaluating Generative Models and Individual Generated Images based on Complexity and Vulnerability}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8754-8763} }
GeneAvatar: Generic Expression-Aware Volumetric Head Avatar Editing from a Single Image: Chong Bao,

Yinda Zhang,

Yuan Li,

Xiyu Zhang,

Bangbang Yang,

Hujun Bao,

Marc Pollefeys,

Guofeng Zhang,

Zhaopeng Cui; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bao_2024_CVPR, author = {Bao, Chong and Zhang, Yinda and Li, Yuan and Zhang, Xiyu and Yang, Bangbang and Bao, Hujun and Pollefeys, Marc and Zhang, Guofeng and Cui, Zhaopeng}, title = {GeneAvatar: Generic Expression-Aware Volumetric Head Avatar Editing from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8952-8963} }
Learn to Rectify the Bias of CLIP for Unsupervised Semantic Segmentation: Jingyun Wang,

Guoliang Kang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Jingyun and Kang, Guoliang}, title = {Learn to Rectify the Bias of CLIP for Unsupervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4102-4112} }
Unlocking Pre-trained Image Backbones for Semantic Image Synthesis: Tariq Berrada Ifriqi,

Jakob Verbeek,

Camille Couprie,

Karteek Alahari; [pdf] [supp]
[bibtex]
@InProceedings{Ifriqi_2024_CVPR, author = {Ifriqi, Tariq Berrada and Verbeek, Jakob and Couprie, Camille and Alahari, Karteek}, title = {Unlocking Pre-trained Image Backbones for Semantic Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7840-7849} }
TexTile: A Differentiable Metric for Texture Tileability: Carlos Rodriguez-Pardo,

Dan Casas,

Elena Garces,

Jorge Lopez-Moreno; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rodriguez-Pardo_2024_CVPR, author = {Rodriguez-Pardo, Carlos and Casas, Dan and Garces, Elena and Lopez-Moreno, Jorge}, title = {TexTile: A Differentiable Metric for Texture Tileability}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4439-4449} }
Improving Image Restoration through Removing Degradations in Textual Representations: Jingbo Lin,

Zhilu Zhang,

Yuxiang Wei,

Dongwei Ren,

Dongsheng Jiang,

Qi Tian,

Wangmeng Zuo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2024_CVPR, author = {Lin, Jingbo and Zhang, Zhilu and Wei, Yuxiang and Ren, Dongwei and Jiang, Dongsheng and Tian, Qi and Zuo, Wangmeng}, title = {Improving Image Restoration through Removing Degradations in Textual Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2866-2878} }
ZONE: Zero-Shot Instruction-Guided Local Editing: Shanglin Li,

Bohan Zeng,

Yutang Feng,

Sicheng Gao,

Xiuhui Liu,

Jiaming Liu,

Lin Li,

Xu Tang,

Yao Hu,

Jianzhuang Liu,

Baochang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Shanglin and Zeng, Bohan and Feng, Yutang and Gao, Sicheng and Liu, Xiuhui and Liu, Jiaming and Li, Lin and Tang, Xu and Hu, Yao and Liu, Jianzhuang and Zhang, Baochang}, title = {ZONE: Zero-Shot Instruction-Guided Local Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6254-6263} }
U-VAP: User-specified Visual Appearance Personalization via Decoupled Self Augmentation: You Wu,

Kean Liu,

Xiaoyue Mi,

Fan Tang,

Juan Cao,

Jintao Li; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, You and Liu, Kean and Mi, Xiaoyue and Tang, Fan and Cao, Juan and Li, Jintao}, title = {U-VAP: User-specified Visual Appearance Personalization via Decoupled Self Augmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9482-9491} }
HHMR: Holistic Hand Mesh Recovery by Enhancing the Multimodal Controllability of Graph Diffusion Models: Mengcheng Li,

Hongwen Zhang,

Yuxiang Zhang,

Ruizhi Shao,

Tao Yu,

Yebin Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Mengcheng and Zhang, Hongwen and Zhang, Yuxiang and Shao, Ruizhi and Yu, Tao and Liu, Yebin}, title = {HHMR: Holistic Hand Mesh Recovery by Enhancing the Multimodal Controllability of Graph Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {645-654} }
Robust Self-calibration of Focal Lengths from the Fundamental Matrix: Viktor Kocur,

Daniel Kyselica,

Zuzana Kukelova; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kocur_2024_CVPR, author = {Kocur, Viktor and Kyselica, Daniel and Kukelova, Zuzana}, title = {Robust Self-calibration of Focal Lengths from the Fundamental Matrix}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5220-5229} }
PartDistill: 3D Shape Part Segmentation by Vision-Language Model Distillation: Ardian Umam,

Cheng-Kun Yang,

Min-Hung Chen,

Jen-Hui Chuang,

Yen-Yu Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Umam_2024_CVPR, author = {Umam, Ardian and Yang, Cheng-Kun and Chen, Min-Hung and Chuang, Jen-Hui and Lin, Yen-Yu}, title = {PartDistill: 3D Shape Part Segmentation by Vision-Language Model Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3470-3479} }
DragDiffusion: Harnessing Diffusion Models for Interactive Point-based Image Editing: Yujun Shi,

Chuhui Xue,

Jun Hao Liew,

Jiachun Pan,

Hanshu Yan,

Wenqing Zhang,

Vincent Y. F. Tan,

Song Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2024_CVPR, author = {Shi, Yujun and Xue, Chuhui and Liew, Jun Hao and Pan, Jiachun and Yan, Hanshu and Zhang, Wenqing and Tan, Vincent Y. F. and Bai, Song}, title = {DragDiffusion: Harnessing Diffusion Models for Interactive Point-based Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8839-8849} }
Addressing Background Context Bias in Few-Shot Segmentation through Iterative Modulation: Lanyun Zhu,

Tianrun Chen,

Jianxiong Yin,

Simon See,

Jun Liu; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2024_CVPR, author = {Zhu, Lanyun and Chen, Tianrun and Yin, Jianxiong and See, Simon and Liu, Jun}, title = {Addressing Background Context Bias in Few-Shot Segmentation through Iterative Modulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3370-3379} }
TiNO-Edit: Timestep and Noise Optimization for Robust Diffusion-Based Image Editing: Sherry X Chen,

Yaron Vaxman,

Elad Ben Baruch,

David Asulin,

Aviad Moreshet,

Kuo-Chin Lien,

Misha Sra,

Pradeep Sen; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Sherry X and Vaxman, Yaron and Ben Baruch, Elad and Asulin, David and Moreshet, Aviad and Lien, Kuo-Chin and Sra, Misha and Sen, Pradeep}, title = {TiNO-Edit: Timestep and Noise Optimization for Robust Diffusion-Based Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6337-6346} }
AdaShift: Learning Discriminative Self-Gated Neural Feature Activation With an Adaptive Shift Factor: Sudong Cai; [pdf] [supp]
[bibtex]
@InProceedings{Cai_2024_CVPR, author = {Cai, Sudong}, title = {AdaShift: Learning Discriminative Self-Gated Neural Feature Activation With an Adaptive Shift Factor}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5947-5956} }
SCEdit: Efficient and Controllable Image Diffusion Generation via Skip Connection Editing: Zeyinzi Jiang,

Chaojie Mao,

Yulin Pan,

Zhen Han,

Jingfeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2024_CVPR, author = {Jiang, Zeyinzi and Mao, Chaojie and Pan, Yulin and Han, Zhen and Zhang, Jingfeng}, title = {SCEdit: Efficient and Controllable Image Diffusion Generation via Skip Connection Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8995-9004} }
BA-SAM: Scalable Bias-Mode Attention Mask for Segment Anything Model: Yiran Song,

Qianyu Zhou,

Xiangtai Li,

Deng-Ping Fan,

Xuequan Lu,

Lizhuang Ma; [pdf] [supp]
[bibtex]
@InProceedings{Song_2024_CVPR, author = {Song, Yiran and Zhou, Qianyu and Li, Xiangtai and Fan, Deng-Ping and Lu, Xuequan and Ma, Lizhuang}, title = {BA-SAM: Scalable Bias-Mode Attention Mask for Segment Anything Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3162-3173} }
Deciphering 'What' and 'Where' Visual Pathways from Spectral Clustering of Layer-Distributed Neural Representations: Xiao Zhang,

David Yunis,

Michael Maire; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Xiao and Yunis, David and Maire, Michael}, title = {Deciphering 'What' and 'Where' Visual Pathways from Spectral Clustering of Layer-Distributed Neural Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4165-4175} }
Real-Time Exposure Correction via Collaborative Transformations and Adaptive Sampling: Ziwen Li,

Feng Zhang,

Meng Cao,

Jinpu Zhang,

Yuanjie Shao,

Yuehuan Wang,

Nong Sang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Ziwen and Zhang, Feng and Cao, Meng and Zhang, Jinpu and Shao, Yuanjie and Wang, Yuehuan and Sang, Nong}, title = {Real-Time Exposure Correction via Collaborative Transformations and Adaptive Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2984-2994} }
Lodge: A Coarse to Fine Diffusion Network for Long Dance Generation Guided by the Characteristic Dance Primitives: Ronghui Li,

YuXiang Zhang,

Yachao Zhang,

Hongwen Zhang,

Jie Guo,

Yan Zhang,

Yebin Liu,

Xiu Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Ronghui and Zhang, YuXiang and Zhang, Yachao and Zhang, Hongwen and Guo, Jie and Zhang, Yan and Liu, Yebin and Li, Xiu}, title = {Lodge: A Coarse to Fine Diffusion Network for Long Dance Generation Guided by the Characteristic Dance Primitives}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1524-1534} }
Transcending Forgery Specificity with Latent Space Augmentation for Generalizable Deepfake Detection: Zhiyuan Yan,

Yuhao Luo,

Siwei Lyu,

Qingshan Liu,

Baoyuan Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2024_CVPR, author = {Yan, Zhiyuan and Luo, Yuhao and Lyu, Siwei and Liu, Qingshan and Wu, Baoyuan}, title = {Transcending Forgery Specificity with Latent Space Augmentation for Generalizable Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8984-8994} }
Scaling Laws of Synthetic Images for Model Training ... for Now: Lijie Fan,

Kaifeng Chen,

Dilip Krishnan,

Dina Katabi,

Phillip Isola,

Yonglong Tian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2024_CVPR, author = {Fan, Lijie and Chen, Kaifeng and Krishnan, Dilip and Katabi, Dina and Isola, Phillip and Tian, Yonglong}, title = {Scaling Laws of Synthetic Images for Model Training ... for Now}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7382-7392} }
State Space Models for Event Cameras: Nikola Zubic,

Mathias Gehrig,

Davide Scaramuzza; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zubic_2024_CVPR, author = {Zubic, Nikola and Gehrig, Mathias and Scaramuzza, Davide}, title = {State Space Models for Event Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5819-5828} }
TeTriRF: Temporal Tri-Plane Radiance Fields for Efficient Free-Viewpoint Video: Minye Wu,

Zehao Wang,

Georgios Kouros,

Tinne Tuytelaars; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, Minye and Wang, Zehao and Kouros, Georgios and Tuytelaars, Tinne}, title = {TeTriRF: Temporal Tri-Plane Radiance Fields for Efficient Free-Viewpoint Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6487-6496} }
Event-assisted Low-Light Video Object Segmentation: Hebei Li,

Jin Wang,

Jiahui Yuan,

Yue Li,

Wenming Weng,

Yansong Peng,

Yueyi Zhang,

Zhiwei Xiong,

Xiaoyan Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Hebei and Wang, Jin and Yuan, Jiahui and Li, Yue and Weng, Wenming and Peng, Yansong and Zhang, Yueyi and Xiong, Zhiwei and Sun, Xiaoyan}, title = {Event-assisted Low-Light Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3250-3259} }
VidToMe: Video Token Merging for Zero-Shot Video Editing: Xirui Li,

Chao Ma,

Xiaokang Yang,

Ming-Hsuan Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Xirui and Ma, Chao and Yang, Xiaokang and Yang, Ming-Hsuan}, title = {VidToMe: Video Token Merging for Zero-Shot Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7486-7495} }
FaceChain-SuDe: Building Derived Class to Inherit Category Attributes for One-shot Subject-Driven Generation: Pengchong Qiao,

Lei Shang,

Chang Liu,

Baigui Sun,

Xiangyang Ji,

Jie Chen; [pdf] [supp]
[bibtex]
@InProceedings{Qiao_2024_CVPR, author = {Qiao, Pengchong and Shang, Lei and Liu, Chang and Sun, Baigui and Ji, Xiangyang and Chen, Jie}, title = {FaceChain-SuDe: Building Derived Class to Inherit Category Attributes for One-shot Subject-Driven Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7215-7224} }
StableVITON: Learning Semantic Correspondence with Latent Diffusion Model for Virtual Try-On: Jeongho Kim,

Guojung Gu,

Minho Park,

Sunghyun Park,

Jaegul Choo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2024_CVPR, author = {Kim, Jeongho and Gu, Guojung and Park, Minho and Park, Sunghyun and Choo, Jaegul}, title = {StableVITON: Learning Semantic Correspondence with Latent Diffusion Model for Virtual Try-On}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8176-8185} }
Make-Your-Anchor: A Diffusion-based 2D Avatar Generation Framework: Ziyao Huang,

Fan Tang,

Yong Zhang,

Xiaodong Cun,

Juan Cao,

Jintao Li,

Tong-Yee Lee; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2024_CVPR, author = {Huang, Ziyao and Tang, Fan and Zhang, Yong and Cun, Xiaodong and Cao, Juan and Li, Jintao and Lee, Tong-Yee}, title = {Make-Your-Anchor: A Diffusion-based 2D Avatar Generation Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6997-7006} }
Learning Dynamic Tetrahedra for High-Quality Talking Head Synthesis: Zicheng Zhang,

Ruobing Zheng,

Bonan Li,

Congying Han,

Tianqi Li,

Meng Wang,

Tiande Guo,

Jingdong Chen,

Ziwen Liu,

Ming Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Zicheng and Zheng, Ruobing and Li, Bonan and Han, Congying and Li, Tianqi and Wang, Meng and Guo, Tiande and Chen, Jingdong and Liu, Ziwen and Yang, Ming}, title = {Learning Dynamic Tetrahedra for High-Quality Talking Head Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5209-5219} }
3D Geometry-Aware Deformable Gaussian Splatting for Dynamic View Synthesis: Zhicheng Lu,

Xiang Guo,

Le Hui,

Tianrui Chen,

Min Yang,

Xiao Tang,

Feng Zhu,

Yuchao Dai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2024_CVPR, author = {Lu, Zhicheng and Guo, Xiang and Hui, Le and Chen, Tianrui and Yang, Min and Tang, Xiao and Zhu, Feng and Dai, Yuchao}, title = {3D Geometry-Aware Deformable Gaussian Splatting for Dynamic View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8900-8910} }
Person-in-WiFi 3D: End-to-End Multi-Person 3D Pose Estimation with Wi-Fi: Kangwei Yan,

Fei Wang,

Bo Qian,

Han Ding,

Jinsong Han,

Xing Wei; [pdf]
[bibtex]
@InProceedings{Yan_2024_CVPR, author = {Yan, Kangwei and Wang, Fei and Qian, Bo and Ding, Han and Han, Jinsong and Wei, Xing}, title = {Person-in-WiFi 3D: End-to-End Multi-Person 3D Pose Estimation with Wi-Fi}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {969-978} }
Fairy: Fast Parallelized Instruction-Guided Video-to-Video Synthesis: Bichen Wu,

Ching-Yao Chuang,

Xiaoyan Wang,

Yichen Jia,

Kapil Krishnakumar,

Tong Xiao,

Feng Liang,

Licheng Yu,

Peter Vajda; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, Bichen and Chuang, Ching-Yao and Wang, Xiaoyan and Jia, Yichen and Krishnakumar, Kapil and Xiao, Tong and Liang, Feng and Yu, Licheng and Vajda, Peter}, title = {Fairy: Fast Parallelized Instruction-Guided Video-to-Video Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8261-8270} }
SmartEdit: Exploring Complex Instruction-based Image Editing with Multimodal Large Language Models: Yuzhou Huang,

Liangbin Xie,

Xintao Wang,

Ziyang Yuan,

Xiaodong Cun,

Yixiao Ge,

Jiantao Zhou,

Chao Dong,

Rui Huang,

Ruimao Zhang,

Ying Shan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2024_CVPR, author = {Huang, Yuzhou and Xie, Liangbin and Wang, Xintao and Yuan, Ziyang and Cun, Xiaodong and Ge, Yixiao and Zhou, Jiantao and Dong, Chao and Huang, Rui and Zhang, Ruimao and Shan, Ying}, title = {SmartEdit: Exploring Complex Instruction-based Image Editing with Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8362-8371} }
It's All About Your Sketch: Democratising Sketch Control in Diffusion Models: Subhadeep Koley,

Ayan Kumar Bhunia,

Deeptanshu Sekhri,

Aneeshan Sain,

Pinaki Nath Chowdhury,

Tao Xiang,

Yi-Zhe Song; [pdf] [supp]
[bibtex]
@InProceedings{Koley_2024_CVPR, author = {Koley, Subhadeep and Bhunia, Ayan Kumar and Sekhri, Deeptanshu and Sain, Aneeshan and Chowdhury, Pinaki Nath and Xiang, Tao and Song, Yi-Zhe}, title = {It's All About Your Sketch: Democratising Sketch Control in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7204-7214} }
When StyleGAN Meets Stable Diffusion: a W+ Adapter for Personalized Image Generation: Xiaoming Li,

Xinyu Hou,

Chen Change Loy; [pdf] [supp]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Xiaoming and Hou, Xinyu and Loy, Chen Change}, title = {When StyleGAN Meets Stable Diffusion: a W+ Adapter for Personalized Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2187-2196} }
CAM Back Again: Large Kernel CNNs from a Weakly Supervised Object Localization Perspective: Shunsuke Yasuki,

Masato Taki; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yasuki_2024_CVPR, author = {Yasuki, Shunsuke and Taki, Masato}, title = {CAM Back Again: Large Kernel CNNs from a Weakly Supervised Object Localization Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {341-351} }
Putting the Object Back into Video Object Segmentation: Ho Kei Cheng,

Seoung Wug Oh,

Brian Price,

Joon-Young Lee,

Alexander Schwing; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2024_CVPR, author = {Cheng, Ho Kei and Oh, Seoung Wug and Price, Brian and Lee, Joon-Young and Schwing, Alexander}, title = {Putting the Object Back into Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3151-3161} }
Concept Weaver: Enabling Multi-Concept Fusion in Text-to-Image Models: Gihyun Kwon,

Simon Jenni,

Dingzeyu Li,

Joon-Young Lee,

Jong Chul Ye,

Fabian Caba Heilbron; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kwon_2024_CVPR, author = {Kwon, Gihyun and Jenni, Simon and Li, Dingzeyu and Lee, Joon-Young and Ye, Jong Chul and Heilbron, Fabian Caba}, title = {Concept Weaver: Enabling Multi-Concept Fusion in Text-to-Image Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8880-8889} }
Cross-Domain Few-Shot Segmentation via Iterative Support-Query Correspondence Mining: Jiahao Nie,

Yun Xing,

Gongjie Zhang,

Pei Yan,

Aoran Xiao,

Yap-Peng Tan,

Alex C. Kot,

Shijian Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nie_2024_CVPR, author = {Nie, Jiahao and Xing, Yun and Zhang, Gongjie and Yan, Pei and Xiao, Aoran and Tan, Yap-Peng and Kot, Alex C. and Lu, Shijian}, title = {Cross-Domain Few-Shot Segmentation via Iterative Support-Query Correspondence Mining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3380-3390} }
DiffSHEG: A Diffusion-Based Approach for Real-Time Speech-driven Holistic 3D Expression and Gesture Generation: Junming Chen,

Yunfei Liu,

Jianan Wang,

Ailing Zeng,

Yu Li,

Qifeng Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Junming and Liu, Yunfei and Wang, Jianan and Zeng, Ailing and Li, Yu and Chen, Qifeng}, title = {DiffSHEG: A Diffusion-Based Approach for Real-Time Speech-driven Holistic 3D Expression and Gesture Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7352-7361} }
Animating General Image with Large Visual Motion Model: Dengsheng Chen,

Xiaoming Wei,

Xiaolin Wei; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Dengsheng and Wei, Xiaoming and Wei, Xiaolin}, title = {Animating General Image with Large Visual Motion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7131-7140} }
DIRECT-3D: Learning Direct Text-to-3D Generation on Massive Noisy 3D Data: Qihao Liu,

Yi Zhang,

Song Bai,

Adam Kortylewski,

Alan Yuille; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Qihao and Zhang, Yi and Bai, Song and Kortylewski, Adam and Yuille, Alan}, title = {DIRECT-3D: Learning Direct Text-to-3D Generation on Massive Noisy 3D Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6881-6891} }
OHTA: One-shot Hand Avatar via Data-driven Implicit Priors: Xiaozheng Zheng,

Chao Wen,

Zhuo Su,

Zeran Xu,

Zhaohu Li,

Yang Zhao,

Zhou Xue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2024_CVPR, author = {Zheng, Xiaozheng and Wen, Chao and Su, Zhuo and Xu, Zeran and Li, Zhaohu and Zhao, Yang and Xue, Zhou}, title = {OHTA: One-shot Hand Avatar via Data-driven Implicit Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {799-810} }
Human Motion Prediction Under Unexpected Perturbation: Jiangbei Yue,

Baiyi Li,

Julien Pettré,

Armin Seyfried,

He Wang; [pdf] [supp]
[bibtex]
@InProceedings{Yue_2024_CVPR, author = {Yue, Jiangbei and Li, Baiyi and Pettr\'e, Julien and Seyfried, Armin and Wang, He}, title = {Human Motion Prediction Under Unexpected Perturbation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1501-1511} }
Text-to-3D Generation with Bidirectional Diffusion using both 2D and 3D priors: Lihe Ding,

Shaocong Dong,

Zhanpeng Huang,

Zibin Wang,

Yiyuan Zhang,

Kaixiong Gong,

Dan Xu,

Tianfan Xue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ding_2024_CVPR, author = {Ding, Lihe and Dong, Shaocong and Huang, Zhanpeng and Wang, Zibin and Zhang, Yiyuan and Gong, Kaixiong and Xu, Dan and Xue, Tianfan}, title = {Text-to-3D Generation with Bidirectional Diffusion using both 2D and 3D priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5115-5124} }
Make-It-Vivid: Dressing Your Animatable Biped Cartoon Characters from Text: Junshu Tang,

Yanhong Zeng,

Ke Fan,

Xuheng Wang,

Bo Dai,

Kai Chen,

Lizhuang Ma; [pdf]
[bibtex]
@InProceedings{Tang_2024_CVPR, author = {Tang, Junshu and Zeng, Yanhong and Fan, Ke and Wang, Xuheng and Dai, Bo and Chen, Kai and Ma, Lizhuang}, title = {Make-It-Vivid: Dressing Your Animatable Biped Cartoon Characters from Text}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6243-6253} }
Neural Sign Actors: A Diffusion Model for 3D Sign Language Production from Text: Vasileios Baltatzis,

Rolandos Alexandros Potamias,

Evangelos Ververas,

Guanxiong Sun,

Jiankang Deng,

Stefanos Zafeiriou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Baltatzis_2024_CVPR, author = {Baltatzis, Vasileios and Potamias, Rolandos Alexandros and Ververas, Evangelos and Sun, Guanxiong and Deng, Jiankang and Zafeiriou, Stefanos}, title = {Neural Sign Actors: A Diffusion Model for 3D Sign Language Production from Text}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1985-1995} }
On the Diversity and Realism of Distilled Dataset: An Efficient Dataset Distillation Paradigm: Peng Sun,

Bei Shi,

Daiwei Yu,

Tao Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2024_CVPR, author = {Sun, Peng and Shi, Bei and Yu, Daiwei and Lin, Tao}, title = {On the Diversity and Realism of Distilled Dataset: An Efficient Dataset Distillation Paradigm}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9390-9399} }
Semantics-aware Motion Retargeting with Vision-Language Models: Haodong Zhang,

Zhike Chen,

Haocheng Xu,

Lei Hao,

Xiaofei Wu,

Songcen Xu,

Zhensong Zhang,

Yue Wang,

Rong Xiong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Haodong and Chen, Zhike and Xu, Haocheng and Hao, Lei and Wu, Xiaofei and Xu, Songcen and Zhang, Zhensong and Wang, Yue and Xiong, Rong}, title = {Semantics-aware Motion Retargeting with Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2155-2164} }
Unsupervised Semantic Segmentation Through Depth-Guided Feature Correlation and Sampling: Leon Sick,

Dominik Engel,

Pedro Hermosilla,

Timo Ropinski; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sick_2024_CVPR, author = {Sick, Leon and Engel, Dominik and Hermosilla, Pedro and Ropinski, Timo}, title = {Unsupervised Semantic Segmentation Through Depth-Guided Feature Correlation and Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3637-3646} }
RAVE: Randomized Noise Shuffling for Fast and Consistent Video Editing with Diffusion Models: Ozgur Kara,

Bariscan Kurtkaya,

Hidir Yesiltepe,

James M. Rehg,

Pinar Yanardag; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kara_2024_CVPR, author = {Kara, Ozgur and Kurtkaya, Bariscan and Yesiltepe, Hidir and Rehg, James M. and Yanardag, Pinar}, title = {RAVE: Randomized Noise Shuffling for Fast and Consistent Video Editing with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6507-6516} }
Video-Based Human Pose Regression via Decoupled Space-Time Aggregation: Jijie He,

Wenwu Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2024_CVPR, author = {He, Jijie and Yang, Wenwu}, title = {Video-Based Human Pose Regression via Decoupled Space-Time Aggregation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1022-1031} }
L-MAGIC: Language Model Assisted Generation of Images with Coherence: Zhipeng Cai,

Matthias Mueller,

Reiner Birkl,

Diana Wofk,

Shao-Yen Tseng,

Junda Cheng,

Gabriela Ben-Melech Stan,

Vasudev Lai,

Michael Paulitsch; [pdf] [supp]
[bibtex]
@InProceedings{Cai_2024_CVPR, author = {Cai, Zhipeng and Mueller, Matthias and Birkl, Reiner and Wofk, Diana and Tseng, Shao-Yen and Cheng, Junda and Stan, Gabriela Ben-Melech and Lai, Vasudev and Paulitsch, Michael}, title = {L-MAGIC: Language Model Assisted Generation of Images with Coherence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7049-7058} }
3D Face Tracking from 2D Video through Iterative Dense UV to Image Flow: Felix Taubner,

Prashant Raina,

Mathieu Tuli,

Eu Wern Teh,

Chul Lee,

Jinmiao Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Taubner_2024_CVPR, author = {Taubner, Felix and Raina, Prashant and Tuli, Mathieu and Teh, Eu Wern and Lee, Chul and Huang, Jinmiao}, title = {3D Face Tracking from 2D Video through Iterative Dense UV to Image Flow}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1227-1237} }
Carve3D: Improving Multi-view Reconstruction Consistency for Diffusion Models with RL Finetuning: Desai Xie,

Jiahao Li,

Hao Tan,

Xin Sun,

Zhixin Shu,

Yi Zhou,

Sai Bi,

Sören Pirk,

Arie E. Kaufman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2024_CVPR, author = {Xie, Desai and Li, Jiahao and Tan, Hao and Sun, Xin and Shu, Zhixin and Zhou, Yi and Bi, Sai and Pirk, S\"oren and Kaufman, Arie E.}, title = {Carve3D: Improving Multi-view Reconstruction Consistency for Diffusion Models with RL Finetuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6369-6379} }
Shadow Generation for Composite Image Using Diffusion Model: Qingyang Liu,

Junqi You,

Jianting Wang,

Xinhao Tao,

Bo Zhang,

Li Niu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Qingyang and You, Junqi and Wang, Jianting and Tao, Xinhao and Zhang, Bo and Niu, Li}, title = {Shadow Generation for Composite Image Using Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8121-8130} }
DisCo: Disentangled Control for Realistic Human Dance Generation: Tan Wang,

Linjie Li,

Kevin Lin,

Yuanhao Zhai,

Chung-Ching Lin,

Zhengyuan Yang,

Hanwang Zhang,

Zicheng Liu,

Lijuan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Tan and Li, Linjie and Lin, Kevin and Zhai, Yuanhao and Lin, Chung-Ching and Yang, Zhengyuan and Zhang, Hanwang and Liu, Zicheng and Wang, Lijuan}, title = {DisCo: Disentangled Control for Realistic Human Dance Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9326-9336} }
GaussianShader: 3D Gaussian Splatting with Shading Functions for Reflective Surfaces: Yingwenqi Jiang,

Jiadong Tu,

Yuan Liu,

Xifeng Gao,

Xiaoxiao Long,

Wenping Wang,

Yuexin Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2024_CVPR, author = {Jiang, Yingwenqi and Tu, Jiadong and Liu, Yuan and Gao, Xifeng and Long, Xiaoxiao and Wang, Wenping and Ma, Yuexin}, title = {GaussianShader: 3D Gaussian Splatting with Shading Functions for Reflective Surfaces}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5322-5332} }
pix2gestalt: Amodal Segmentation by Synthesizing Wholes: Ege Ozguroglu,

Ruoshi Liu,

Dídac Surís,

Dian Chen,

Achal Dave,

Pavel Tokmakov,

Carl Vondrick; [pdf]
[bibtex]
@InProceedings{Ozguroglu_2024_CVPR, author = {Ozguroglu, Ege and Liu, Ruoshi and Sur{\'\i}s, D{\'\i}dac and Chen, Dian and Dave, Achal and Tokmakov, Pavel and Vondrick, Carl}, title = {pix2gestalt: Amodal Segmentation by Synthesizing Wholes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3931-3940} }
Weakly Supervised Point Cloud Semantic Segmentation via Artificial Oracle: Hyeokjun Kweon,

Jihun Kim,

Kuk-Jin Yoon; [pdf] [supp]
[bibtex]
@InProceedings{Kweon_2024_CVPR, author = {Kweon, Hyeokjun and Kim, Jihun and Yoon, Kuk-Jin}, title = {Weakly Supervised Point Cloud Semantic Segmentation via Artificial Oracle}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3721-3731} }
Forecasting of 3D Whole-body Human Poses with Grasping Objects: Haitao Yan,

Qiongjie Cui,

Jiexin Xie,

Shijie Guo; [pdf]
[bibtex]
@InProceedings{Yan_2024_CVPR, author = {Yan, Haitao and Cui, Qiongjie and Xie, Jiexin and Guo, Shijie}, title = {Forecasting of 3D Whole-body Human Poses with Grasping Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1726-1736} }
Accelerating Diffusion Sampling with Optimized Time Steps: Shuchen Xue,

Zhaoqiang Liu,

Fei Chen,

Shifeng Zhang,

Tianyang Hu,

Enze Xie,

Zhenguo Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xue_2024_CVPR, author = {Xue, Shuchen and Liu, Zhaoqiang and Chen, Fei and Zhang, Shifeng and Hu, Tianyang and Xie, Enze and Li, Zhenguo}, title = {Accelerating Diffusion Sampling with Optimized Time Steps}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8292-8301} }
Unsupervised Template-assisted Point Cloud Shape Correspondence Network: Jiacheng Deng,

Jiahao Lu,

Tianzhu Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Deng_2024_CVPR, author = {Deng, Jiacheng and Lu, Jiahao and Zhang, Tianzhu}, title = {Unsupervised Template-assisted Point Cloud Shape Correspondence Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5250-5259} }
Finsler-Laplace-Beltrami Operators with Application to Shape Analysis: Simon Weber,

Thomas Dagès,

Maolin Gao,

Daniel Cremers; [pdf] [supp]
[bibtex]
@InProceedings{Weber_2024_CVPR, author = {Weber, Simon and Dag\`es, Thomas and Gao, Maolin and Cremers, Daniel}, title = {Finsler-Laplace-Beltrami Operators with Application to Shape Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3131-3140} }
Minimal Perspective Autocalibration: Andrea Porfiri Dal Cin,

Timothy Duff,

Luca Magri,

Tomas Pajdla; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cin_2024_CVPR, author = {Cin, Andrea Porfiri Dal and Duff, Timothy and Magri, Luca and Pajdla, Tomas}, title = {Minimal Perspective Autocalibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5064-5073} }
Time- Memory- and Parameter-Efficient Visual Adaptation: Otniel-Bogdan Mercea,

Alexey Gritsenko,

Cordelia Schmid,

Anurag Arnab; [pdf] [supp]
[bibtex]
@InProceedings{Mercea_2024_CVPR, author = {Mercea, Otniel-Bogdan and Gritsenko, Alexey and Schmid, Cordelia and Arnab, Anurag}, title = {Time- Memory- and Parameter-Efficient Visual Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5536-5545} }
Suppress and Rebalance: Towards Generalized Multi-Modal Face Anti-Spoofing: Xun Lin,

Shuai Wang,

Rizhao Cai,

Yizhong Liu,

Ying Fu,

Wenzhong Tang,

Zitong Yu,

Alex Kot; [pdf] [arXiv]
[bibtex]
@InProceedings{Lin_2024_CVPR, author = {Lin, Xun and Wang, Shuai and Cai, Rizhao and Liu, Yizhong and Fu, Ying and Tang, Wenzhong and Yu, Zitong and Kot, Alex}, title = {Suppress and Rebalance: Towards Generalized Multi-Modal Face Anti-Spoofing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {211-221} }
Universal Segmentation at Arbitrary Granularity with Language Instruction: Yong Liu,

Cairong Zhang,

Yitong Wang,

Jiahao Wang,

Yujiu Yang,

Yansong Tang; [pdf] [arXiv ]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Yong and Zhang, Cairong and Wang, Yitong and Wang, Jiahao and Yang, Yujiu and Tang, Yansong}, title = {Universal Segmentation at Arbitrary Granularity with Language Instruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3459-3469} }
Layout-Agnostic Scene Text Image Synthesis with Diffusion Models: Qilong Zhangli,

Jindong Jiang,

Di Liu,

Licheng Yu,

Xiaoliang Dai,

Ankit Ramchandani,

Guan Pang,

Dimitris N. Metaxas,

Praveen Krishnan; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhangli_2024_CVPR, author = {Zhangli, Qilong and Jiang, Jindong and Liu, Di and Yu, Licheng and Dai, Xiaoliang and Ramchandani, Ankit and Pang, Guan and Metaxas, Dimitris N. and Krishnan, Praveen}, title = {Layout-Agnostic Scene Text Image Synthesis with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7496-7506} }
SmartMask: Context Aware High-Fidelity Mask Generation for Fine-grained Object Insertion and Layout Control: Jaskirat Singh,

Jianming Zhang,

Qing Liu,

Cameron Smith,

Zhe Lin,

Liang Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Singh_2024_CVPR, author = {Singh, Jaskirat and Zhang, Jianming and Liu, Qing and Smith, Cameron and Lin, Zhe and Zheng, Liang}, title = {SmartMask: Context Aware High-Fidelity Mask Generation for Fine-grained Object Insertion and Layout Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6497-6506} }
Customization Assistant for Text-to-Image Generation: Yufan Zhou,

Ruiyi Zhang,

Jiuxiang Gu,

Tong Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2024_CVPR, author = {Zhou, Yufan and Zhang, Ruiyi and Gu, Jiuxiang and Sun, Tong}, title = {Customization Assistant for Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9182-9191} }
GenHowTo: Learning to Generate Actions and State Transformations from Instructional Videos: Tomáš Souček,

Dima Damen,

Michael Wray,

Ivan Laptev,

Josef Sivic; [pdf] [supp]
[bibtex]
@InProceedings{Soucek_2024_CVPR, author = {Sou\v{c}ek, Tom\'a\v{s} and Damen, Dima and Wray, Michael and Laptev, Ivan and Sivic, Josef}, title = {GenHowTo: Learning to Generate Actions and State Transformations from Instructional Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6561-6571} }
Paint-it: Text-to-Texture Synthesis via Deep Convolutional Texture Map Optimization and Physically-Based Rendering: Kim Youwang,

Tae-Hyun Oh,

Gerard Pons-Moll; [pdf] [supp]
[bibtex]
@InProceedings{Youwang_2024_CVPR, author = {Youwang, Kim and Oh, Tae-Hyun and Pons-Moll, Gerard}, title = {Paint-it: Text-to-Texture Synthesis via Deep Convolutional Texture Map Optimization and Physically-Based Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4347-4356} }
Physics-Aware Hand-Object Interaction Denoising: Haowen Luo,

Yunze Liu,

Li Yi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2024_CVPR, author = {Luo, Haowen and Liu, Yunze and Yi, Li}, title = {Physics-Aware Hand-Object Interaction Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2341-2350} }
VastGaussian: Vast 3D Gaussians for Large Scene Reconstruction: Jiaqi Lin,

Zhihao Li,

Xiao Tang,

Jianzhuang Liu,

Shiyong Liu,

Jiayue Liu,

Yangdi Lu,

Xiaofei Wu,

Songcen Xu,

Youliang Yan,

Wenming Yang; [pdf] [arXiv]
[bibtex]
@InProceedings{Lin_2024_CVPR, author = {Lin, Jiaqi and Li, Zhihao and Tang, Xiao and Liu, Jianzhuang and Liu, Shiyong and Liu, Jiayue and Lu, Yangdi and Wu, Xiaofei and Xu, Songcen and Yan, Youliang and Yang, Wenming}, title = {VastGaussian: Vast 3D Gaussians for Large Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5166-5175} }
Edit One for All: Interactive Batch Image Editing: Thao Nguyen,

Utkarsh Ojha,

Yuheng Li,

Haotian Liu,

Yong Jae Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2024_CVPR, author = {Nguyen, Thao and Ojha, Utkarsh and Li, Yuheng and Liu, Haotian and Lee, Yong Jae}, title = {Edit One for All: Interactive Batch Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8271-8280} }
Deformable One-shot Face Stylization via DINO Semantic Guidance: Yang Zhou,

Zichong Chen,

Hui Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2024_CVPR, author = {Zhou, Yang and Chen, Zichong and Huang, Hui}, title = {Deformable One-shot Face Stylization via DINO Semantic Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7787-7796} }
Coarse-to-Fine Latent Diffusion for Pose-Guided Person Image Synthesis: Yanzuo Lu,

Manlin Zhang,

Andy J Ma,

Xiaohua Xie,

Jianhuang Lai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2024_CVPR, author = {Lu, Yanzuo and Zhang, Manlin and Ma, Andy J and Xie, Xiaohua and Lai, Jianhuang}, title = {Coarse-to-Fine Latent Diffusion for Pose-Guided Person Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6420-6429} }
OMG: Towards Open-vocabulary Motion Generation via Mixture of Controllers: Han Liang,

Jiacheng Bao,

Ruichi Zhang,

Sihan Ren,

Yuecheng Xu,

Sibei Yang,

Xin Chen,

Jingyi Yu,

Lan Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2024_CVPR, author = {Liang, Han and Bao, Jiacheng and Zhang, Ruichi and Ren, Sihan and Xu, Yuecheng and Yang, Sibei and Chen, Xin and Yu, Jingyi and Xu, Lan}, title = {OMG: Towards Open-vocabulary Motion Generation via Mixture of Controllers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {482-493} }
Align Your Gaussians: Text-to-4D with Dynamic 3D Gaussians and Composed Diffusion Models: Huan Ling,

Seung Wook Kim,

Antonio Torralba,

Sanja Fidler,

Karsten Kreis; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ling_2024_CVPR, author = {Ling, Huan and Kim, Seung Wook and Torralba, Antonio and Fidler, Sanja and Kreis, Karsten}, title = {Align Your Gaussians: Text-to-4D with Dynamic 3D Gaussians and Composed Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8576-8588} }
PDF: A Probability-Driven Framework for Open World 3D Point Cloud Semantic Segmentation: Jinfeng Xu,

Siyuan Yang,

Xianzhi Li,

Yuan Tang,

Yixue Hao,

Long Hu,

Min Chen; [pdf] [arXiv]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Jinfeng and Yang, Siyuan and Li, Xianzhi and Tang, Yuan and Hao, Yixue and Hu, Long and Chen, Min}, title = {PDF: A Probability-Driven Framework for Open World 3D Point Cloud Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5977-5986} }
Test-Time Domain Generalization for Face Anti-Spoofing: Qianyu Zhou,

Ke-Yue Zhang,

Taiping Yao,

Xuequan Lu,

Shouhong Ding,

Lizhuang Ma; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhou_2024_CVPR, author = {Zhou, Qianyu and Zhang, Ke-Yue and Yao, Taiping and Lu, Xuequan and Ding, Shouhong and Ma, Lizhuang}, title = {Test-Time Domain Generalization for Face Anti-Spoofing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {175-187} }
Real-time 3D-aware Portrait Video Relighting: Ziqi Cai,

Kaiwen Jiang,

Shu-Yu Chen,

Yu-Kun Lai,

Hongbo Fu,

Boxin Shi,

Lin Gao; [pdf] [supp]
[bibtex]
@InProceedings{Cai_2024_CVPR, author = {Cai, Ziqi and Jiang, Kaiwen and Chen, Shu-Yu and Lai, Yu-Kun and Fu, Hongbo and Shi, Boxin and Gao, Lin}, title = {Real-time 3D-aware Portrait Video Relighting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6221-6231} }
3DGS-Avatar: Animatable Avatars via Deformable 3D Gaussian Splatting: Zhiyin Qian,

Shaofei Wang,

Marko Mihajlovic,

Andreas Geiger,

Siyu Tang; [pdf] [supp]
[bibtex]
@InProceedings{Qian_2024_CVPR, author = {Qian, Zhiyin and Wang, Shaofei and Mihajlovic, Marko and Geiger, Andreas and Tang, Siyu}, title = {3DGS-Avatar: Animatable Avatars via Deformable 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5020-5030} }
Style Aligned Image Generation via Shared Attention: Amir Hertz,

Andrey Voynov,

Shlomi Fruchter,

Daniel Cohen-Or; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hertz_2024_CVPR, author = {Hertz, Amir and Voynov, Andrey and Fruchter, Shlomi and Cohen-Or, Daniel}, title = {Style Aligned Image Generation via Shared Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4775-4785} }
Back to 3D: Few-Shot 3D Keypoint Detection with Back-Projected 2D Features: Thomas Wimmer,

Peter Wonka,

Maks Ovsjanikov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wimmer_2024_CVPR, author = {Wimmer, Thomas and Wonka, Peter and Ovsjanikov, Maks}, title = {Back to 3D: Few-Shot 3D Keypoint Detection with Back-Projected 2D Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4154-4164} }
Neural Markov Random Field for Stereo Matching: Tongfan Guan,

Chen Wang,

Yun-Hui Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guan_2024_CVPR, author = {Guan, Tongfan and Wang, Chen and Liu, Yun-Hui}, title = {Neural Markov Random Field for Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5459-5469} }
PoseIRM: Enhance 3D Human Pose Estimation on Unseen Camera Settings via Invariant Risk Minimization: Yanlu Cai,

Weizhong Zhang,

Yuan Wu,

Cheng Jin; [pdf] [supp]
[bibtex]
@InProceedings{Cai_2024_CVPR, author = {Cai, Yanlu and Zhang, Weizhong and Wu, Yuan and Jin, Cheng}, title = {PoseIRM: Enhance 3D Human Pose Estimation on Unseen Camera Settings via Invariant Risk Minimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2124-2133} }
CCEdit: Creative and Controllable Video Editing via Diffusion Models: Ruoyu Feng,

Wenming Weng,

Yanhui Wang,

Yuhui Yuan,

Jianmin Bao,

Chong Luo,

Zhibo Chen,

Baining Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2024_CVPR, author = {Feng, Ruoyu and Weng, Wenming and Wang, Yanhui and Yuan, Yuhui and Bao, Jianmin and Luo, Chong and Chen, Zhibo and Guo, Baining}, title = {CCEdit: Creative and Controllable Video Editing via Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6712-6722} }
HAVE-FUN: Human Avatar Reconstruction from Few-Shot Unconstrained Images: Xihe Yang,

Xingyu Chen,

Daiheng Gao,

Shaohui Wang,

Xiaoguang Han,

Baoyuan Wang; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Xihe and Chen, Xingyu and Gao, Daiheng and Wang, Shaohui and Han, Xiaoguang and Wang, Baoyuan}, title = {HAVE-FUN: Human Avatar Reconstruction from Few-Shot Unconstrained Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {742-752} }
DiffMorpher: Unleashing the Capability of Diffusion Models for Image Morphing: Kaiwen Zhang,

Yifan Zhou,

Xudong Xu,

Bo Dai,

Xingang Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Kaiwen and Zhou, Yifan and Xu, Xudong and Dai, Bo and Pan, Xingang}, title = {DiffMorpher: Unleashing the Capability of Diffusion Models for Image Morphing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7912-7921} }
Towards Real-World HDR Video Reconstruction: A Large-Scale Benchmark Dataset and A Two-Stage Alignment Network: Yong Shu,

Liquan Shen,

Xiangyu Hu,

Mengyao Li,

Zihao Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shu_2024_CVPR, author = {Shu, Yong and Shen, Liquan and Hu, Xiangyu and Li, Mengyao and Zhou, Zihao}, title = {Towards Real-World HDR Video Reconstruction: A Large-Scale Benchmark Dataset and A Two-Stage Alignment Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2879-2888} }
Efficient 3D Implicit Head Avatar with Mesh-anchored Hash Table Blendshapes: Ziqian Bai,

Feitong Tan,

Sean Fanello,

Rohit Pandey,

Mingsong Dou,

Shichen Liu,

Ping Tan,

Yinda Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bai_2024_CVPR, author = {Bai, Ziqian and Tan, Feitong and Fanello, Sean and Pandey, Rohit and Dou, Mingsong and Liu, Shichen and Tan, Ping and Zhang, Yinda}, title = {Efficient 3D Implicit Head Avatar with Mesh-anchored Hash Table Blendshapes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1975-1984} }
No Time to Train: Empowering Non-Parametric Networks for Few-shot 3D Scene Segmentation: Xiangyang Zhu,

Renrui Zhang,

Bowei He,

Ziyu Guo,

Jiaming Liu,

Han Xiao,

Chaoyou Fu,

Hao Dong,

Peng Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2024_CVPR, author = {Zhu, Xiangyang and Zhang, Renrui and He, Bowei and Guo, Ziyu and Liu, Jiaming and Xiao, Han and Fu, Chaoyou and Dong, Hao and Gao, Peng}, title = {No Time to Train: Empowering Non-Parametric Networks for Few-shot 3D Scene Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3838-3847} }
PhysGaussian: Physics-Integrated 3D Gaussians for Generative Dynamics: Tianyi Xie,

Zeshun Zong,

Yuxing Qiu,

Xuan Li,

Yutao Feng,

Yin Yang,

Chenfanfu Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2024_CVPR, author = {Xie, Tianyi and Zong, Zeshun and Qiu, Yuxing and Li, Xuan and Feng, Yutao and Yang, Yin and Jiang, Chenfanfu}, title = {PhysGaussian: Physics-Integrated 3D Gaussians for Generative Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4389-4398} }
Spatio-Temporal Turbulence Mitigation: A Translational Perspective: Xingguang Zhang,

Nicholas Chimitt,

Yiheng Chi,

Zhiyuan Mao,

Stanley H. Chan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Xingguang and Chimitt, Nicholas and Chi, Yiheng and Mao, Zhiyuan and Chan, Stanley H.}, title = {Spatio-Temporal Turbulence Mitigation: A Translational Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2889-2899} }
Grounded Text-to-Image Synthesis with Attention Refocusing: Quynh Phung,

Songwei Ge,

Jia-Bin Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Phung_2024_CVPR, author = {Phung, Quynh and Ge, Songwei and Huang, Jia-Bin}, title = {Grounded Text-to-Image Synthesis with Attention Refocusing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7932-7942} }
IReNe: Instant Recoloring of Neural Radiance Fields: Alessio Mazzucchelli,

Adrian Garcia-Garcia,

Elena Garces,

Fernando Rivas-Manzaneque,

Francesc Moreno-Noguer,

Adrian Penate-Sanchez; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mazzucchelli_2024_CVPR, author = {Mazzucchelli, Alessio and Garcia-Garcia, Adrian and Garces, Elena and Rivas-Manzaneque, Fernando and Moreno-Noguer, Francesc and Penate-Sanchez, Adrian}, title = {IReNe: Instant Recoloring of Neural Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5937-5946} }
Class Tokens Infusion for Weakly Supervised Semantic Segmentation: Sung-Hoon Yoon,

Hoyong Kwon,

Hyeonseong Kim,

Kuk-Jin Yoon; [pdf] [supp]
[bibtex]
@InProceedings{Yoon_2024_CVPR, author = {Yoon, Sung-Hoon and Kwon, Hoyong and Kim, Hyeonseong and Yoon, Kuk-Jin}, title = {Class Tokens Infusion for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3595-3605} }
FedHCA2: Towards Hetero-Client Federated Multi-Task Learning: Yuxiang Lu,

Suizhi Huang,

Yuwen Yang,

Shalayiding Sirejiding,

Yue Ding,

Hongtao Lu; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2024_CVPR, author = {Lu, Yuxiang and Huang, Suizhi and Yang, Yuwen and Sirejiding, Shalayiding and Ding, Yue and Lu, Hongtao}, title = {FedHCA2: Towards Hetero-Client Federated Multi-Task Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5599-5609} }
Motion Diversification Networks: Hee Jae Kim,

Eshed Ohn-Bar; [pdf]
[bibtex]
@InProceedings{Kim_2024_CVPR, author = {Kim, Hee Jae and Ohn-Bar, Eshed}, title = {Motion Diversification Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1650-1660} }
Telling Left from Right: Identifying Geometry-Aware Semantic Correspondence: Junyi Zhang,

Charles Herrmann,

Junhwa Hur,

Eric Chen,

Varun Jampani,

Deqing Sun,

Ming-Hsuan Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Junyi and Herrmann, Charles and Hur, Junhwa and Chen, Eric and Jampani, Varun and Sun, Deqing and Yang, Ming-Hsuan}, title = {Telling Left from Right: Identifying Geometry-Aware Semantic Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3076-3085} }
PAIR Diffusion: A Comprehensive Multimodal Object-Level Image Editor: Vidit Goel,

Elia Peruzzo,

Yifan Jiang,

Dejia Xu,

Xingqian Xu,

Nicu Sebe,

Trevor Darrell,

Zhangyang Wang,

Humphrey Shi; [pdf] [supp]
[bibtex]
@InProceedings{Goel_2024_CVPR, author = {Goel, Vidit and Peruzzo, Elia and Jiang, Yifan and Xu, Dejia and Xu, Xingqian and Sebe, Nicu and Darrell, Trevor and Wang, Zhangyang and Shi, Humphrey}, title = {PAIR Diffusion: A Comprehensive Multimodal Object-Level Image Editor}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8609-8618} }
TokenCompose: Text-to-Image Diffusion with Token-level Supervision: Zirui Wang,

Zhizhou Sha,

Zheng Ding,

Yilin Wang,

Zhuowen Tu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Zirui and Sha, Zhizhou and Ding, Zheng and Wang, Yilin and Tu, Zhuowen}, title = {TokenCompose: Text-to-Image Diffusion with Token-level Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8553-8564} }
FINER: Flexible Spectral-bias Tuning in Implicit NEural Representation by Variable-periodic Activation Functions: Zhen Liu,

Hao Zhu,

Qi Zhang,

Jingde Fu,

Weibing Deng,

Zhan Ma,

Yanwen Guo,

Xun Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Zhen and Zhu, Hao and Zhang, Qi and Fu, Jingde and Deng, Weibing and Ma, Zhan and Guo, Yanwen and Cao, Xun}, title = {FINER: Flexible Spectral-bias Tuning in Implicit NEural Representation by Variable-periodic Activation Functions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2713-2722} }
TextCraftor: Your Text Encoder Can be Image Quality Controller: Yanyu Li,

Xian Liu,

Anil Kag,

Ju Hu,

Yerlan Idelbayev,

Dhritiman Sagar,

Yanzhi Wang,

Sergey Tulyakov,

Jian Ren; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Yanyu and Liu, Xian and Kag, Anil and Hu, Ju and Idelbayev, Yerlan and Sagar, Dhritiman and Wang, Yanzhi and Tulyakov, Sergey and Ren, Jian}, title = {TextCraftor: Your Text Encoder Can be Image Quality Controller}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7985-7995} }
IMPRINT: Generative Object Compositing by Learning Identity-Preserving Representation: Yizhi Song,

Zhifei Zhang,

Zhe Lin,

Scott Cohen,

Brian Price,

Jianming Zhang,

Soo Ye Kim,

He Zhang,

Wei Xiong,

Daniel Aliaga; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2024_CVPR, author = {Song, Yizhi and Zhang, Zhifei and Lin, Zhe and Cohen, Scott and Price, Brian and Zhang, Jianming and Kim, Soo Ye and Zhang, He and Xiong, Wei and Aliaga, Daniel}, title = {IMPRINT: Generative Object Compositing by Learning Identity-Preserving Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8048-8058} }
Portrait4D: Learning One-Shot 4D Head Avatar Synthesis using Synthetic Data: Yu Deng,

Duomin Wang,

Xiaohang Ren,

Xingyu Chen,

Baoyuan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deng_2024_CVPR, author = {Deng, Yu and Wang, Duomin and Ren, Xiaohang and Chen, Xingyu and Wang, Baoyuan}, title = {Portrait4D: Learning One-Shot 4D Head Avatar Synthesis using Synthetic Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7119-7130} }
ConvoFusion: Multi-Modal Conversational Diffusion for Co-Speech Gesture Synthesis: Muhammad Hamza Mughal,

Rishabh Dabral,

Ikhsanul Habibie,

Lucia Donatelli,

Marc Habermann,

Christian Theobalt; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mughal_2024_CVPR, author = {Mughal, Muhammad Hamza and Dabral, Rishabh and Habibie, Ikhsanul and Donatelli, Lucia and Habermann, Marc and Theobalt, Christian}, title = {ConvoFusion: Multi-Modal Conversational Diffusion for Co-Speech Gesture Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1388-1398} }
Boosting Neural Representations for Videos with a Conditional Decoder: Xinjie Zhang,

Ren Yang,

Dailan He,

Xingtong Ge,

Tongda Xu,

Yan Wang,

Hongwei Qin,

Jun Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Xinjie and Yang, Ren and He, Dailan and Ge, Xingtong and Xu, Tongda and Wang, Yan and Qin, Hongwei and Zhang, Jun}, title = {Boosting Neural Representations for Videos with a Conditional Decoder}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2556-2566} }
From Audio to Photoreal Embodiment: Synthesizing Humans in Conversations: Evonne Ng,

Javier Romero,

Timur Bagautdinov,

Shaojie Bai,

Trevor Darrell,

Angjoo Kanazawa,

Alexander Richard; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ng_2024_CVPR, author = {Ng, Evonne and Romero, Javier and Bagautdinov, Timur and Bai, Shaojie and Darrell, Trevor and Kanazawa, Angjoo and Richard, Alexander}, title = {From Audio to Photoreal Embodiment: Synthesizing Humans in Conversations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1001-1010} }
Single-View Scene Point Cloud Human Grasp Generation: Yan-Kang Wang,

Chengyi Xing,

Yi-Lin Wei,

Xiao-Ming Wu,

Wei-Shi Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Yan-Kang and Xing, Chengyi and Wei, Yi-Lin and Wu, Xiao-Ming and Zheng, Wei-Shi}, title = {Single-View Scene Point Cloud Human Grasp Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {831-841} }
One-step Diffusion with Distribution Matching Distillation: Tianwei Yin,

Michaël Gharbi,

Richard Zhang,

Eli Shechtman,

Frédo Durand,

William T. Freeman,

Taesung Park; [pdf] [arXiv]
[bibtex]
@InProceedings{Yin_2024_CVPR, author = {Yin, Tianwei and Gharbi, Micha\"el and Zhang, Richard and Shechtman, Eli and Durand, Fr\'edo and Freeman, William T. and Park, Taesung}, title = {One-step Diffusion with Distribution Matching Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6613-6623} }
Rethinking Human Motion Prediction with Symplectic Integral: Haipeng Chen,

Kedi Lyu,

Zhenguang Liu,

Yifang Yin,

Xun Yang,

Yingda Lyu; [pdf]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Haipeng and Lyu, Kedi and Liu, Zhenguang and Yin, Yifang and Yang, Xun and Lyu, Yingda}, title = {Rethinking Human Motion Prediction with Symplectic Integral}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2134-2143} }
CPGA: Coding Priors-Guided Aggregation Network for Compressed Video Quality Enhancement: Qiang Zhu,

Jinhua Hao,

Yukang Ding,

Yu Liu,

Qiao Mo,

Ming Sun,

Chao Zhou,

Shuyuan Zhu; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhu_2024_CVPR, author = {Zhu, Qiang and Hao, Jinhua and Ding, Yukang and Liu, Yu and Mo, Qiao and Sun, Ming and Zhou, Chao and Zhu, Shuyuan}, title = {CPGA: Coding Priors-Guided Aggregation Network for Compressed Video Quality Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2964-2974} }
MicroCinema: A Divide-and-Conquer Approach for Text-to-Video Generation: Yanhui Wang,

Jianmin Bao,

Wenming Weng,

Ruoyu Feng,

Dacheng Yin,

Tao Yang,

Jingxu Zhang,

Qi Dai,

Zhiyuan Zhao,

Chunyu Wang,

Kai Qiu,

Yuhui Yuan,

Xiaoyan Sun,

Chong Luo,

Baining Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Yanhui and Bao, Jianmin and Weng, Wenming and Feng, Ruoyu and Yin, Dacheng and Yang, Tao and Zhang, Jingxu and Dai, Qi and Zhao, Zhiyuan and Wang, Chunyu and Qiu, Kai and Yuan, Yuhui and Sun, Xiaoyan and Luo, Chong and Guo, Baining}, title = {MicroCinema: A Divide-and-Conquer Approach for Text-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8414-8424} }
Structure Matters: Tackling the Semantic Discrepancy in Diffusion Models for Image Inpainting: Haipeng Liu,

Yang Wang,

Biao Qian,

Meng Wang,

Yong Rui; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Haipeng and Wang, Yang and Qian, Biao and Wang, Meng and Rui, Yong}, title = {Structure Matters: Tackling the Semantic Discrepancy in Diffusion Models for Image Inpainting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8038-8047} }
Makeup Prior Models for 3D Facial Makeup Estimation and Applications: Xingchao Yang,

Takafumi Taketomi,

Yuki Endo,

Yoshihiro Kanamori; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Xingchao and Taketomi, Takafumi and Endo, Yuki and Kanamori, Yoshihiro}, title = {Makeup Prior Models for 3D Facial Makeup Estimation and Applications}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2165-2176} }
I'M HOI: Inertia-aware Monocular Capture of 3D Human-Object Interactions: Chengfeng Zhao,

Juze Zhang,

Jiashen Du,

Ziwei Shan,

Junye Wang,

Jingyi Yu,

Jingya Wang,

Lan Xu; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2024_CVPR, author = {Zhao, Chengfeng and Zhang, Juze and Du, Jiashen and Shan, Ziwei and Wang, Junye and Yu, Jingyi and Wang, Jingya and Xu, Lan}, title = {I'M HOI: Inertia-aware Monocular Capture of 3D Human-Object Interactions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {729-741} }
Dynamic Policy-Driven Adaptive Multi-Instance Learning for Whole Slide Image Classification: Tingting Zheng,

Kui Jiang,

Hongxun Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2024_CVPR, author = {Zheng, Tingting and Jiang, Kui and Yao, Hongxun}, title = {Dynamic Policy-Driven Adaptive Multi-Instance Learning for Whole Slide Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8028-8037} }
LiDAR4D: Dynamic Neural Fields for Novel Space-time View LiDAR Synthesis: Zehan Zheng,

Fan Lu,

Weiyi Xue,

Guang Chen,

Changjun Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2024_CVPR, author = {Zheng, Zehan and Lu, Fan and Xue, Weiyi and Chen, Guang and Jiang, Changjun}, title = {LiDAR4D: Dynamic Neural Fields for Novel Space-time View LiDAR Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5145-5154} }
Exploiting Diffusion Prior for Generalizable Dense Prediction: Hsin-Ying Lee,

Hung-Yu Tseng,

Hsin-Ying Lee,

Ming-Hsuan Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2024_CVPR, author = {Lee, Hsin-Ying and Tseng, Hung-Yu and Lee, Hsin-Ying and Yang, Ming-Hsuan}, title = {Exploiting Diffusion Prior for Generalizable Dense Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7861-7871} }
Orthogonal Adaptation for Modular Customization of Diffusion Models: Ryan Po,

Guandao Yang,

Kfir Aberman,

Gordon Wetzstein; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Po_2024_CVPR, author = {Po, Ryan and Yang, Guandao and Aberman, Kfir and Wetzstein, Gordon}, title = {Orthogonal Adaptation for Modular Customization of Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7964-7973} }
Optimizing Diffusion Noise Can Serve As Universal Motion Priors: Korrawe Karunratanakul,

Konpat Preechakul,

Emre Aksan,

Thabo Beeler,

Supasorn Suwajanakorn,

Siyu Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Karunratanakul_2024_CVPR, author = {Karunratanakul, Korrawe and Preechakul, Konpat and Aksan, Emre and Beeler, Thabo and Suwajanakorn, Supasorn and Tang, Siyu}, title = {Optimizing Diffusion Noise Can Serve As Universal Motion Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1334-1345} }
OVFoodSeg: Elevating Open-Vocabulary Food Image Segmentation via Image-Informed Textual Representation: Xiongwei Wu,

Sicheng Yu,

Ee-Peng Lim,

Chong-Wah Ngo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, Xiongwei and Yu, Sicheng and Lim, Ee-Peng and Ngo, Chong-Wah}, title = {OVFoodSeg: Elevating Open-Vocabulary Food Image Segmentation via Image-Informed Textual Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4144-4153} }
XFeat: Accelerated Features for Lightweight Image Matching: Guilherme Potje,

Felipe Cadar,

André Araujo,

Renato Martins,

Erickson R. Nascimento; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Potje_2024_CVPR, author = {Potje, Guilherme and Cadar, Felipe and Araujo, Andr\'e and Martins, Renato and Nascimento, Erickson R.}, title = {XFeat: Accelerated Features for Lightweight Image Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2682-2691} }
VideoRF: Rendering Dynamic Radiance Fields as 2D Feature Video Streams: Liao Wang,

Kaixin Yao,

Chengcheng Guo,

Zhirui Zhang,

Qiang Hu,

Jingyi Yu,

Lan Xu,

Minye Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Liao and Yao, Kaixin and Guo, Chengcheng and Zhang, Zhirui and Hu, Qiang and Yu, Jingyi and Xu, Lan and Wu, Minye}, title = {VideoRF: Rendering Dynamic Radiance Fields as 2D Feature Video Streams}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {470-481} }
DPHMs: Diffusion Parametric Head Models for Depth-based Tracking: Jiapeng Tang,

Angela Dai,

Yinyu Nie,

Lev Markhasin,

Justus Thies,

Matthias Nießner; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2024_CVPR, author = {Tang, Jiapeng and Dai, Angela and Nie, Yinyu and Markhasin, Lev and Thies, Justus and Nie{\ss}ner, Matthias}, title = {DPHMs: Diffusion Parametric Head Models for Depth-based Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1111-1122} }
DetDiffusion: Synergizing Generative and Perceptive Models for Enhanced Data Generation and Perception: Yibo Wang,

Ruiyuan Gao,

Kai Chen,

Kaiqiang Zhou,

Yingjie Cai,

Lanqing Hong,

Zhenguo Li,

Lihui Jiang,

Dit-Yan Yeung,

Qiang Xu,

Kai Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Yibo and Gao, Ruiyuan and Chen, Kai and Zhou, Kaiqiang and Cai, Yingjie and Hong, Lanqing and Li, Zhenguo and Jiang, Lihui and Yeung, Dit-Yan and Xu, Qiang and Zhang, Kai}, title = {DetDiffusion: Synergizing Generative and Perceptive Models for Enhanced Data Generation and Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7246-7255} }
Perception-Oriented Video Frame Interpolation via Asymmetric Blending: Guangyang Wu,

Xin Tao,

Changlin Li,

Wenyi Wang,

Xiaohong Liu,

Qingqing Zheng; [pdf] [arXiv]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, Guangyang and Tao, Xin and Li, Changlin and Wang, Wenyi and Liu, Xiaohong and Zheng, Qingqing}, title = {Perception-Oriented Video Frame Interpolation via Asymmetric Blending}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2753-2762} }
DUDF: Differentiable Unsigned Distance Fields with Hyperbolic Scaling: Miguel Fainstein,

Viviana Siless,

Emmanuel Iarussi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fainstein_2024_CVPR, author = {Fainstein, Miguel and Siless, Viviana and Iarussi, Emmanuel}, title = {DUDF: Differentiable Unsigned Distance Fields with Hyperbolic Scaling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4484-4493} }
2S-UDF: A Novel Two-stage UDF Learning Method for Robust Non-watertight Model Reconstruction from Multi-view Images: Junkai Deng,

Fei Hou,

Xuhui Chen,

Wencheng Wang,

Ying He; [pdf] [supp]
[bibtex]
@InProceedings{Deng_2024_CVPR, author = {Deng, Junkai and Hou, Fei and Chen, Xuhui and Wang, Wencheng and He, Ying}, title = {2S-UDF: A Novel Two-stage UDF Learning Method for Robust Non-watertight Model Reconstruction from Multi-view Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5084-5093} }
UniVS: Unified and Universal Video Segmentation with Prompts as Queries: Minghan Li,

Shuai Li,

Xindong Zhang,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Minghan and Li, Shuai and Zhang, Xindong and Zhang, Lei}, title = {UniVS: Unified and Universal Video Segmentation with Prompts as Queries}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3227-3238} }
Efficiently Assemble Normalization Layers and Regularization for Federated Domain Generalization: Khiem Le,

Long Ho,

Cuong Do,

Danh Le-Phuoc,

Kok-Seng Wong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Le_2024_CVPR, author = {Le, Khiem and Ho, Long and Do, Cuong and Le-Phuoc, Danh and Wong, Kok-Seng}, title = {Efficiently Assemble Normalization Layers and Regularization for Federated Domain Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6027-6036} }
Depth Information Assisted Collaborative Mutual Promotion Network for Single Image Dehazing: Yafei Zhang,

Shen Zhou,

Huafeng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yafei and Zhou, Shen and Li, Huafeng}, title = {Depth Information Assisted Collaborative Mutual Promotion Network for Single Image Dehazing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2846-2855} }
Unlocking the Potential of Pre-trained Vision Transformers for Few-Shot Semantic Segmentation through Relationship Descriptors: Ziqin Zhou,

Hai-Ming Xu,

Yangyang Shu,

Lingqiao Liu; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2024_CVPR, author = {Zhou, Ziqin and Xu, Hai-Ming and Shu, Yangyang and Liu, Lingqiao}, title = {Unlocking the Potential of Pre-trained Vision Transformers for Few-Shot Semantic Segmentation through Relationship Descriptors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3817-3827} }
CustomListener: Text-guided Responsive Interaction for User-friendly Listening Head Generation: Xi Liu,

Ying Guo,

Cheng Zhen,

Tong Li,

Yingying Ao,

Pengfei Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Xi and Guo, Ying and Zhen, Cheng and Li, Tong and Ao, Yingying and Yan, Pengfei}, title = {CustomListener: Text-guided Responsive Interaction for User-friendly Listening Head Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2415-2424} }
Fun with Flags: Robust Principal Directions via Flag Manifolds: Nathan Mankovich,

Gustau Camps-Valls,

Tolga Birdal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mankovich_2024_CVPR, author = {Mankovich, Nathan and Camps-Valls, Gustau and Birdal, Tolga}, title = {Fun with Flags: Robust Principal Directions via Flag Manifolds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {330-340} }
Generating Non-Stationary Textures using Self-Rectification: Yang Zhou,

Rongjun Xiao,

Dani Lischinski,

Daniel Cohen-Or,

Hui Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2024_CVPR, author = {Zhou, Yang and Xiao, Rongjun and Lischinski, Dani and Cohen-Or, Daniel and Huang, Hui}, title = {Generating Non-Stationary Textures using Self-Rectification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7767-7776} }
SPU-PMD: Self-Supervised Point Cloud Upsampling via Progressive Mesh Deformation: Yanzhe Liu,

Rong Chen,

Yushi Li,

Yixi Li,

Xuehou Tan; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Yanzhe and Chen, Rong and Li, Yushi and Li, Yixi and Tan, Xuehou}, title = {SPU-PMD: Self-Supervised Point Cloud Upsampling via Progressive Mesh Deformation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5188-5197} }
Snap Video: Scaled Spatiotemporal Transformers for Text-to-Video Synthesis: Willi Menapace,

Aliaksandr Siarohin,

Ivan Skorokhodov,

Ekaterina Deyneka,

Tsai-Shien Chen,

Anil Kag,

Yuwei Fang,

Aleksei Stoliar,

Elisa Ricci,

Jian Ren,

Sergey Tulyakov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Menapace_2024_CVPR, author = {Menapace, Willi and Siarohin, Aliaksandr and Skorokhodov, Ivan and Deyneka, Ekaterina and Chen, Tsai-Shien and Kag, Anil and Fang, Yuwei and Stoliar, Aleksei and Ricci, Elisa and Ren, Jian and Tulyakov, Sergey}, title = {Snap Video: Scaled Spatiotemporal Transformers for Text-to-Video Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7038-7048} }
JointSQ: Joint Sparsification-Quantization for Distributed Learning: Weiying Xie,

Haowei Li,

Jitao Ma,

Yunsong Li,

Jie Lei,

Donglai Liu,

Leyuan Fang; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2024_CVPR, author = {Xie, Weiying and Li, Haowei and Ma, Jitao and Li, Yunsong and Lei, Jie and Liu, Donglai and Fang, Leyuan}, title = {JointSQ: Joint Sparsification-Quantization for Distributed Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5778-5787} }
A Unified Framework for Human-centric Point Cloud Video Understanding: Yiteng Xu,

Kecheng Ye,

Xiao Han,

Yiming Ren,

Xinge Zhu,

Yuexin Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Yiteng and Ye, Kecheng and Han, Xiao and Ren, Yiming and Zhu, Xinge and Ma, Yuexin}, title = {A Unified Framework for Human-centric Point Cloud Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1155-1164} }
Shadow-Enlightened Image Outpainting: Hang Yu,

Ruilin Li,

Shaorong Xie,

Jiayan Qiu; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2024_CVPR, author = {Yu, Hang and Li, Ruilin and Xie, Shaorong and Qiu, Jiayan}, title = {Shadow-Enlightened Image Outpainting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7850-7860} }
BOTH2Hands: Inferring 3D Hands from Both Text Prompts and Body Dynamics: Wenqian Zhang,

Molin Huang,

Yuxuan Zhou,

Juze Zhang,

Jingyi Yu,

Jingya Wang,

Lan Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Wenqian and Huang, Molin and Zhou, Yuxuan and Zhang, Juze and Yu, Jingyi and Wang, Jingya and Xu, Lan}, title = {BOTH2Hands: Inferring 3D Hands from Both Text Prompts and Body Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2393-2404} }
DreamAvatar: Text-and-Shape Guided 3D Human Avatar Generation via Diffusion Models: Yukang Cao,

Yan-Pei Cao,

Kai Han,

Ying Shan,

Kwan-Yee K. Wong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2024_CVPR, author = {Cao, Yukang and Cao, Yan-Pei and Han, Kai and Shan, Ying and Wong, Kwan-Yee K.}, title = {DreamAvatar: Text-and-Shape Guided 3D Human Avatar Generation via Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {958-968} }
Bidirectional Autoregessive Diffusion Model for Dance Generation: Canyu Zhang,

Youbao Tang,

Ning Zhang,

Ruei-Sung Lin,

Mei Han,

Jing Xiao,

Song Wang; [pdf]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Canyu and Tang, Youbao and Zhang, Ning and Lin, Ruei-Sung and Han, Mei and Xiao, Jing and Wang, Song}, title = {Bidirectional Autoregessive Diffusion Model for Dance Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {687-696} }
FRESCO: Spatial-Temporal Correspondence for Zero-Shot Video Translation: Shuai Yang,

Yifan Zhou,

Ziwei Liu,

Chen Change Loy; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Shuai and Zhou, Yifan and Liu, Ziwei and Loy, Chen Change}, title = {FRESCO: Spatial-Temporal Correspondence for Zero-Shot Video Translation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8703-8712} }
SplattingAvatar: Realistic Real-Time Human Avatars with Mesh-Embedded Gaussian Splatting: Zhijing Shao,

Zhaolong Wang,

Zhuang Li,

Duotun Wang,

Xiangru Lin,

Yu Zhang,

Mingming Fan,

Zeyu Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shao_2024_CVPR, author = {Shao, Zhijing and Wang, Zhaolong and Li, Zhuang and Wang, Duotun and Lin, Xiangru and Zhang, Yu and Fan, Mingming and Wang, Zeyu}, title = {SplattingAvatar: Realistic Real-Time Human Avatars with Mesh-Embedded Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1606-1616} }
MoSAR: Monocular Semi-Supervised Model for Avatar Reconstruction using Differentiable Shading: Abdallah Dib,

Luiz Gustavo Hafemann,

Emeline Got,

Trevor Anderson,

Amin Fadaeinejad,

Rafael M. O. Cruz,

Marc-André Carbonneau; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dib_2024_CVPR, author = {Dib, Abdallah and Hafemann, Luiz Gustavo and Got, Emeline and Anderson, Trevor and Fadaeinejad, Amin and Cruz, Rafael M. O. and Carbonneau, Marc-Andr\'e}, title = {MoSAR: Monocular Semi-Supervised Model for Avatar Reconstruction using Differentiable Shading}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1770-1780} }
RankED: Addressing Imbalance and Uncertainty in Edge Detection Using Ranking-based Losses: Bedrettin Cetinkaya,

Sinan Kalkan,

Emre Akbas; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cetinkaya_2024_CVPR, author = {Cetinkaya, Bedrettin and Kalkan, Sinan and Akbas, Emre}, title = {RankED: Addressing Imbalance and Uncertainty in Edge Detection Using Ranking-based Losses}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3239-3249} }
DiffHuman: Probabilistic Photorealistic 3D Reconstruction of Humans: Akash Sengupta,

Thiemo Alldieck,

Nikos Kolotouros,

Enric Corona,

Andrei Zanfir,

Cristian Sminchisescu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sengupta_2024_CVPR, author = {Sengupta, Akash and Alldieck, Thiemo and Kolotouros, Nikos and Corona, Enric and Zanfir, Andrei and Sminchisescu, Cristian}, title = {DiffHuman: Probabilistic Photorealistic 3D Reconstruction of Humans}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1439-1449} }
Permutation Equivariance of Transformers and Its Applications: Hengyuan Xu,

Liyao Xiang,

Hangyu Ye,

Dixi Yao,

Pengzhi Chu,

Baochun Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Hengyuan and Xiang, Liyao and Ye, Hangyu and Yao, Dixi and Chu, Pengzhi and Li, Baochun}, title = {Permutation Equivariance of Transformers and Its Applications}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5987-5996} }
SVDTree: Semantic Voxel Diffusion for Single Image Tree Reconstruction: Yuan Li,

Zhihao Liu,

Bedrich Benes,

Xiaopeng Zhang,

Jianwei Guo; [pdf] [supp]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Yuan and Liu, Zhihao and Benes, Bedrich and Zhang, Xiaopeng and Guo, Jianwei}, title = {SVDTree: Semantic Voxel Diffusion for Single Image Tree Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4692-4702} }
Rethinking FID: Towards a Better Evaluation Metric for Image Generation: Sadeep Jayasumana,

Srikumar Ramalingam,

Andreas Veit,

Daniel Glasner,

Ayan Chakrabarti,

Sanjiv Kumar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jayasumana_2024_CVPR, author = {Jayasumana, Sadeep and Ramalingam, Srikumar and Veit, Andreas and Glasner, Daniel and Chakrabarti, Ayan and Kumar, Sanjiv}, title = {Rethinking FID: Towards a Better Evaluation Metric for Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9307-9315} }
SuperPrimitive: Scene Reconstruction at a Primitive Level: Kirill Mazur,

Gwangbin Bae,

Andrew J. Davison; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mazur_2024_CVPR, author = {Mazur, Kirill and Bae, Gwangbin and Davison, Andrew J.}, title = {SuperPrimitive: Scene Reconstruction at a Primitive Level}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4979-4989} }
TFMQ-DM: Temporal Feature Maintenance Quantization for Diffusion Models: Yushi Huang,

Ruihao Gong,

Jing Liu,

Tianlong Chen,

Xianglong Liu; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2024_CVPR, author = {Huang, Yushi and Gong, Ruihao and Liu, Jing and Chen, Tianlong and Liu, Xianglong}, title = {TFMQ-DM: Temporal Feature Maintenance Quantization for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7362-7371} }
CONFORM: Contrast is All You Need for High-Fidelity Text-to-Image Diffusion Models: Tuna Han Salih Meral,

Enis Simsar,

Federico Tombari,

Pinar Yanardag; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Meral_2024_CVPR, author = {Meral, Tuna Han Salih and Simsar, Enis and Tombari, Federico and Yanardag, Pinar}, title = {CONFORM: Contrast is All You Need for High-Fidelity Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9005-9014} }
Self-Supervised Facial Representation Learning with Facial Region Awareness: Zheng Gao,

Ioannis Patras; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2024_CVPR, author = {Gao, Zheng and Patras, Ioannis}, title = {Self-Supervised Facial Representation Learning with Facial Region Awareness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2081-2092} }
GaussianDreamer: Fast Generation from Text to 3D Gaussians by Bridging 2D and 3D Diffusion Models: Taoran Yi,

Jiemin Fang,

Junjie Wang,

Guanjun Wu,

Lingxi Xie,

Xiaopeng Zhang,

Wenyu Liu,

Qi Tian,

Xinggang Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yi_2024_CVPR, author = {Yi, Taoran and Fang, Jiemin and Wang, Junjie and Wu, Guanjun and Xie, Lingxi and Zhang, Xiaopeng and Liu, Wenyu and Tian, Qi and Wang, Xinggang}, title = {GaussianDreamer: Fast Generation from Text to 3D Gaussians by Bridging 2D and 3D Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6796-6807} }
Open-Vocabulary Attention Maps with Token Optimization for Semantic Segmentation in Diffusion Models: Pablo Marcos-Manchón,

Roberto Alcover-Couso,

Juan C. SanMiguel,

José M. Martínez; [pdf] [supp]
[bibtex]
@InProceedings{Marcos-Manchon_2024_CVPR, author = {Marcos-Manch\'on, Pablo and Alcover-Couso, Roberto and SanMiguel, Juan C. and Mart{\'\i}nez, Jos\'e M.}, title = {Open-Vocabulary Attention Maps with Token Optimization for Semantic Segmentation in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9242-9252} }
DreamComposer: Controllable 3D Object Generation via Multi-View Conditions: Yunhan Yang,

Yukun Huang,

Xiaoyang Wu,

Yuan-Chen Guo,

Song-Hai Zhang,

Hengshuang Zhao,

Tong He,

Xihui Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Yunhan and Huang, Yukun and Wu, Xiaoyang and Guo, Yuan-Chen and Zhang, Song-Hai and Zhao, Hengshuang and He, Tong and Liu, Xihui}, title = {DreamComposer: Controllable 3D Object Generation via Multi-View Conditions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8111-8120} }
Self-Calibrating Vicinal Risk Minimisation for Model Calibration: Jiawei Liu,

Changkun Ye,

Ruikai Cui,

Nick Barnes; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Jiawei and Ye, Changkun and Cui, Ruikai and Barnes, Nick}, title = {Self-Calibrating Vicinal Risk Minimisation for Model Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3335-3345} }
LPSNet: End-to-End Human Pose and Shape Estimation with Lensless Imaging: Haoyang Ge,

Qiao Feng,

Hailong Jia,

Xiongzheng Li,

Xiangjun Yin,

You Zhou,

Jingyu Yang,

Kun Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ge_2024_CVPR, author = {Ge, Haoyang and Feng, Qiao and Jia, Hailong and Li, Xiongzheng and Yin, Xiangjun and Zhou, You and Yang, Jingyu and Li, Kun}, title = {LPSNet: End-to-End Human Pose and Shape Estimation with Lensless Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1471-1480} }
Towards a Simultaneous and Granular Identity-Expression Control in Personalized Face Generation: Renshuai Liu,

Bowen Ma,

Wei Zhang,

Zhipeng Hu,

Changjie Fan,

Tangjie Lv,

Yu Ding,

Xuan Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Renshuai and Ma, Bowen and Zhang, Wei and Hu, Zhipeng and Fan, Changjie and Lv, Tangjie and Ding, Yu and Cheng, Xuan}, title = {Towards a Simultaneous and Granular Identity-Expression Control in Personalized Face Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2114-2123} }
PEEKABOO: Interactive Video Generation via Masked-Diffusion: Yash Jain,

Anshul Nasery,

Vibhav Vineet,

Harkirat Behl; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jain_2024_CVPR, author = {Jain, Yash and Nasery, Anshul and Vineet, Vibhav and Behl, Harkirat}, title = {PEEKABOO: Interactive Video Generation via Masked-Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8079-8088} }
High-fidelity Person-centric Subject-to-Image Synthesis: Yibin Wang,

Weizhong Zhang,

Jianwei Zheng,

Cheng Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Yibin and Zhang, Weizhong and Zheng, Jianwei and Jin, Cheng}, title = {High-fidelity Person-centric Subject-to-Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7675-7684} }
JeDi: Joint-Image Diffusion Models for Finetuning-Free Personalized Text-to-Image Generation: Yu Zeng,

Vishal M. Patel,

Haochen Wang,

Xun Huang,

Ting-Chun Wang,

Ming-Yu Liu,

Yogesh Balaji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2024_CVPR, author = {Zeng, Yu and Patel, Vishal M. and Wang, Haochen and Huang, Xun and Wang, Ting-Chun and Liu, Ming-Yu and Balaji, Yogesh}, title = {JeDi: Joint-Image Diffusion Models for Finetuning-Free Personalized Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6786-6795} }
HandDiff: 3D Hand Pose Estimation with Diffusion on Image-Point Cloud: Wencan Cheng,

Hao Tang,

Luc Van Gool,

Jong Hwan Ko; [pdf] [arXiv]
[bibtex]
@InProceedings{Cheng_2024_CVPR, author = {Cheng, Wencan and Tang, Hao and Van Gool, Luc and Ko, Jong Hwan}, title = {HandDiff: 3D Hand Pose Estimation with Diffusion on Image-Point Cloud}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2274-2284} }
VP3D: Unleashing 2D Visual Prompt for Text-to-3D Generation: Yang Chen,

Yingwei Pan,

Haibo Yang,

Ting Yao,

Tao Mei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Yang and Pan, Yingwei and Yang, Haibo and Yao, Ting and Mei, Tao}, title = {VP3D: Unleashing 2D Visual Prompt for Text-to-3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4896-4905} }
Content-Style Decoupling for Unsupervised Makeup Transfer without Generating Pseudo Ground Truth: Zhaoyang Sun,

Shengwu Xiong,

Yaxiong Chen,

Yi Rong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2024_CVPR, author = {Sun, Zhaoyang and Xiong, Shengwu and Chen, Yaxiong and Rong, Yi}, title = {Content-Style Decoupling for Unsupervised Makeup Transfer without Generating Pseudo Ground Truth}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7601-7610} }
You Only Need Less Attention at Each Stage in Vision Transformers: Shuoxi Zhang,

Hanpeng Liu,

Stephen Lin,

Kun He; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Shuoxi and Liu, Hanpeng and Lin, Stephen and He, Kun}, title = {You Only Need Less Attention at Each Stage in Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6057-6066} }
Generalizable Novel-View Synthesis using a Stereo Camera: Haechan Lee,

Wonjoon Jin,

Seung-Hwan Baek,

Sunghyun Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2024_CVPR, author = {Lee, Haechan and Jin, Wonjoon and Baek, Seung-Hwan and Cho, Sunghyun}, title = {Generalizable Novel-View Synthesis using a Stereo Camera}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4939-4948} }
Digital Life Project: Autonomous 3D Characters with Social Intelligence: Zhongang Cai,

Jianping Jiang,

Zhongfei Qing,

Xinying Guo,

Mingyuan Zhang,

Zhengyu Lin,

Haiyi Mei,

Chen Wei,

Ruisi Wang,

Wanqi Yin,

Liang Pan,

Xiangyu Fan,

Han Du,

Peng Gao,

Zhitao Yang,

Yang Gao,

Jiaqi Li,

Tianxiang Ren,

Yukun Wei,

Xiaogang Wang,

Chen Change Loy,

Lei Yang,

Ziwei Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2024_CVPR, author = {Cai, Zhongang and Jiang, Jianping and Qing, Zhongfei and Guo, Xinying and Zhang, Mingyuan and Lin, Zhengyu and Mei, Haiyi and Wei, Chen and Wang, Ruisi and Yin, Wanqi and Pan, Liang and Fan, Xiangyu and Du, Han and Gao, Peng and Yang, Zhitao and Gao, Yang and Li, Jiaqi and Ren, Tianxiang and Wei, Yukun and Wang, Xiaogang and Loy, Chen Change and Yang, Lei and Liu, Ziwei}, title = {Digital Life Project: Autonomous 3D Characters with Social Intelligence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {582-592} }
Rethinking Prior Information Generation with CLIP for Few-Shot Segmentation: Jin Wang,

Bingfeng Zhang,

Jian Pang,

Honglong Chen,

Weifeng Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Jin and Zhang, Bingfeng and Pang, Jian and Chen, Honglong and Liu, Weifeng}, title = {Rethinking Prior Information Generation with CLIP for Few-Shot Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3941-3951} }
Generative Rendering: Controllable 4D-Guided Video Generation with 2D Diffusion Models: Shengqu Cai,

Duygu Ceylan,

Matheus Gadelha,

Chun-Hao Paul Huang,

Tuanfeng Yang Wang,

Gordon Wetzstein; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2024_CVPR, author = {Cai, Shengqu and Ceylan, Duygu and Gadelha, Matheus and Huang, Chun-Hao Paul and Wang, Tuanfeng Yang and Wetzstein, Gordon}, title = {Generative Rendering: Controllable 4D-Guided Video Generation with 2D Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7611-7620} }
Relightable Gaussian Codec Avatars: Shunsuke Saito,

Gabriel Schwartz,

Tomas Simon,

Junxuan Li,

Giljoo Nam; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Saito_2024_CVPR, author = {Saito, Shunsuke and Schwartz, Gabriel and Simon, Tomas and Li, Junxuan and Nam, Giljoo}, title = {Relightable Gaussian Codec Avatars}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {130-141} }
Single-to-Dual-View Adaptation for Egocentric 3D Hand Pose Estimation: Ruicong Liu,

Takehiko Ohkawa,

Mingfang Zhang,

Yoichi Sato; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Ruicong and Ohkawa, Takehiko and Zhang, Mingfang and Sato, Yoichi}, title = {Single-to-Dual-View Adaptation for Egocentric 3D Hand Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {677-686} }
Animate Anyone: Consistent and Controllable Image-to-Video Synthesis for Character Animation: Li Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2024_CVPR, author = {Hu, Li}, title = {Animate Anyone: Consistent and Controllable Image-to-Video Synthesis for Character Animation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8153-8163} }
FreeCustom: Tuning-Free Customized Image Generation for Multi-Concept Composition: Ganggui Ding,

Canyu Zhao,

Wen Wang,

Zhen Yang,

Zide Liu,

Hao Chen,

Chunhua Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ding_2024_CVPR, author = {Ding, Ganggui and Zhao, Canyu and Wang, Wen and Yang, Zhen and Liu, Zide and Chen, Hao and Shen, Chunhua}, title = {FreeCustom: Tuning-Free Customized Image Generation for Multi-Concept Composition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9089-9098} }
MaskINT: Video Editing via Interpolative Non-autoregressive Masked Transformers: Haoyu Ma,

Shahin Mahdizadehaghdam,

Bichen Wu,

Zhipeng Fan,

Yuchao Gu,

Wenliang Zhao,

Lior Shapira,

Xiaohui Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2024_CVPR, author = {Ma, Haoyu and Mahdizadehaghdam, Shahin and Wu, Bichen and Fan, Zhipeng and Gu, Yuchao and Zhao, Wenliang and Shapira, Lior and Xie, Xiaohui}, title = {MaskINT: Video Editing via Interpolative Non-autoregressive Masked Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7403-7412} }
Learning Multi-Dimensional Human Preference for Text-to-Image Generation: Sixian Zhang,

Bohan Wang,

Junqiang Wu,

Yan Li,

Tingting Gao,

Di Zhang,

Zhongyuan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Sixian and Wang, Bohan and Wu, Junqiang and Li, Yan and Gao, Tingting and Zhang, Di and Wang, Zhongyuan}, title = {Learning Multi-Dimensional Human Preference for Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8018-8027} }
ViVid-1-to-3: Novel View Synthesis with Video Diffusion Models: Jeong-gi Kwak,

Erqun Dong,

Yuhe Jin,

Hanseok Ko,

Shweta Mahajan,

Kwang Moo Yi; [pdf] [supp]
[bibtex]
@InProceedings{Kwak_2024_CVPR, author = {Kwak, Jeong-gi and Dong, Erqun and Jin, Yuhe and Ko, Hanseok and Mahajan, Shweta and Yi, Kwang Moo}, title = {ViVid-1-to-3: Novel View Synthesis with Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6775-6785} }
Generating Human Motion in 3D Scenes from Text Descriptions: Zhi Cen,

Huaijin Pi,

Sida Peng,

Zehong Shen,

Minghui Yang,

Shuai Zhu,

Hujun Bao,

Xiaowei Zhou; [pdf] [arXiv]
[bibtex]
@InProceedings{Cen_2024_CVPR, author = {Cen, Zhi and Pi, Huaijin and Peng, Sida and Shen, Zehong and Yang, Minghui and Zhu, Shuai and Bao, Hujun and Zhou, Xiaowei}, title = {Generating Human Motion in 3D Scenes from Text Descriptions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1855-1866} }
QDFormer: Towards Robust Audiovisual Segmentation in Complex Environments with Quantization-based Semantic Decomposition: Xiang Li,

Jinglu Wang,

Xiaohao Xu,

Xiulian Peng,

Rita Singh,

Yan Lu,

Bhiksha Raj; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Xiang and Wang, Jinglu and Xu, Xiaohao and Peng, Xiulian and Singh, Rita and Lu, Yan and Raj, Bhiksha}, title = {QDFormer: Towards Robust Audiovisual Segmentation in Complex Environments with Quantization-based Semantic Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3402-3413} }
Fast Adaptation for Human Pose Estimation via Meta-Optimization: Shengxiang Hu,

Huaijiang Sun,

Bin Li,

Dong Wei,

Weiqing Li,

Jianfeng Lu; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2024_CVPR, author = {Hu, Shengxiang and Sun, Huaijiang and Li, Bin and Wei, Dong and Li, Weiqing and Lu, Jianfeng}, title = {Fast Adaptation for Human Pose Estimation via Meta-Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1792-1801} }
WOUAF: Weight Modulation for User Attribution and Fingerprinting in Text-to-Image Diffusion Models: Changhoon Kim,

Kyle Min,

Maitreya Patel,

Sheng Cheng,

Yezhou Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2024_CVPR, author = {Kim, Changhoon and Min, Kyle and Patel, Maitreya and Cheng, Sheng and Yang, Yezhou}, title = {WOUAF: Weight Modulation for User Attribution and Fingerprinting in Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8974-8983} }
Text-Conditioned Generative Model of 3D Strand-based Human Hairstyles: Vanessa Sklyarova,

Egor Zakharov,

Otmar Hilliges,

Michael J. Black,

Justus Thies; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sklyarova_2024_CVPR, author = {Sklyarova, Vanessa and Zakharov, Egor and Hilliges, Otmar and Black, Michael J. and Thies, Justus}, title = {Text-Conditioned Generative Model of 3D Strand-based Human Hairstyles}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4703-4712} }
Skeleton-in-Context: Unified Skeleton Sequence Modeling with In-Context Learning: Xinshun Wang,

Zhongbin Fang,

Xia Li,

Xiangtai Li,

Chen Chen,

Mengyuan Liu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Xinshun and Fang, Zhongbin and Li, Xia and Li, Xiangtai and Chen, Chen and Liu, Mengyuan}, title = {Skeleton-in-Context: Unified Skeleton Sequence Modeling with In-Context Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2436-2446} }
DemoFusion: Democratising High-Resolution Image Generation With No $$$: Ruoyi Du,

Dongliang Chang,

Timothy Hospedales,

Yi-Zhe Song,

Zhanyu Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Du_2024_CVPR, author = {Du, Ruoyi and Chang, Dongliang and Hospedales, Timothy and Song, Yi-Zhe and Ma, Zhanyu}, title = {DemoFusion: Democratising High-Resolution Image Generation With No \$\$\$}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6159-6168} }
Total Selfie: Generating Full-Body Selfies: Bowei Chen,

Brian Curless,

Ira Kemelmacher-Shlizerman,

Steven M. Seitz; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Bowei and Curless, Brian and Kemelmacher-Shlizerman, Ira and Seitz, Steven M.}, title = {Total Selfie: Generating Full-Body Selfies}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6701-6711} }
Learning Structure-from-Motion with Graph Attention Networks: Lucas Brynte,

José Pedro Iglesias,

Carl Olsson,

Fredrik Kahl; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Brynte_2024_CVPR, author = {Brynte, Lucas and Iglesias, Jos\'e Pedro and Olsson, Carl and Kahl, Fredrik}, title = {Learning Structure-from-Motion with Graph Attention Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4808-4817} }
Geometry Transfer for Stylizing Radiance Fields: Hyunyoung Jung,

Seonghyeon Nam,

Nikolaos Sarafianos,

Sungjoo Yoo,

Alexander Sorkine-Hornung,

Rakesh Ranjan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jung_2024_CVPR, author = {Jung, Hyunyoung and Nam, Seonghyeon and Sarafianos, Nikolaos and Yoo, Sungjoo and Sorkine-Hornung, Alexander and Ranjan, Rakesh}, title = {Geometry Transfer for Stylizing Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8565-8575} }
Holoported Characters: Real-time Free-viewpoint Rendering of Humans from Sparse RGB Cameras: Ashwath Shetty,

Marc Habermann,

Guoxing Sun,

Diogo Luvizon,

Vladislav Golyanik,

Christian Theobalt; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shetty_2024_CVPR, author = {Shetty, Ashwath and Habermann, Marc and Sun, Guoxing and Luvizon, Diogo and Golyanik, Vladislav and Theobalt, Christian}, title = {Holoported Characters: Real-time Free-viewpoint Rendering of Humans from Sparse RGB Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1206-1215} }
SEAS: ShapE-Aligned Supervision for Person Re-Identification: Haidong Zhu,

Pranav Budhwant,

Zhaoheng Zheng,

Ram Nevatia; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2024_CVPR, author = {Zhu, Haidong and Budhwant, Pranav and Zheng, Zhaoheng and Nevatia, Ram}, title = {SEAS: ShapE-Aligned Supervision for Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {164-174} }
Making Vision Transformers Truly Shift-Equivariant: Renan A. Rojas-Gomez,

Teck-Yian Lim,

Minh N. Do,

Raymond A. Yeh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rojas-Gomez_2024_CVPR, author = {Rojas-Gomez, Renan A. and Lim, Teck-Yian and Do, Minh N. and Yeh, Raymond A.}, title = {Making Vision Transformers Truly Shift-Equivariant}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5568-5577} }
SpikeNeRF: Learning Neural Radiance Fields from Continuous Spike Stream: Lin Zhu,

Kangmin Jia,

Yifan Zhao,

Yunshan Qi,

Lizhi Wang,

Hua Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2024_CVPR, author = {Zhu, Lin and Jia, Kangmin and Zhao, Yifan and Qi, Yunshan and Wang, Lizhi and Huang, Hua}, title = {SpikeNeRF: Learning Neural Radiance Fields from Continuous Spike Stream}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6285-6295} }
A Semi-supervised Nighttime Dehazing Baseline with Spatial-Frequency Aware and Realistic Brightness Constraint: Xiaofeng Cong,

Jie Gui,

Jing Zhang,

Junming Hou,

Hao Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cong_2024_CVPR, author = {Cong, Xiaofeng and Gui, Jie and Zhang, Jing and Hou, Junming and Shen, Hao}, title = {A Semi-supervised Nighttime Dehazing Baseline with Spatial-Frequency Aware and Realistic Brightness Constraint}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2631-2640} }
Deep Equilibrium Diffusion Restoration with Parallel Sampling: Jiezhang Cao,

Yue Shi,

Kai Zhang,

Yulun Zhang,

Radu Timofte,

Luc Van Gool; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2024_CVPR, author = {Cao, Jiezhang and Shi, Yue and Zhang, Kai and Zhang, Yulun and Timofte, Radu and Van Gool, Luc}, title = {Deep Equilibrium Diffusion Restoration with Parallel Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2824-2834} }
Gaussian Shell Maps for Efficient 3D Human Generation: Rameen Abdal,

Wang Yifan,

Zifan Shi,

Yinghao Xu,

Ryan Po,

Zhengfei Kuang,

Qifeng Chen,

Dit-Yan Yeung,

Gordon Wetzstein; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Abdal_2024_CVPR, author = {Abdal, Rameen and Yifan, Wang and Shi, Zifan and Xu, Yinghao and Po, Ryan and Kuang, Zhengfei and Chen, Qifeng and Yeung, Dit-Yan and Wetzstein, Gordon}, title = {Gaussian Shell Maps for Efficient 3D Human Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9441-9451} }
MoST: Motion Style Transformer Between Diverse Action Contents: Boeun Kim,

Jungho Kim,

Hyung Jin Chang,

Jin Young Choi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2024_CVPR, author = {Kim, Boeun and Kim, Jungho and Chang, Hyung Jin and Choi, Jin Young}, title = {MoST: Motion Style Transformer Between Diverse Action Contents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1705-1714} }
Prompting Hard or Hardly Prompting: Prompt Inversion for Text-to-Image Diffusion Models: Shweta Mahajan,

Tanzila Rahman,

Kwang Moo Yi,

Leonid Sigal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mahajan_2024_CVPR, author = {Mahajan, Shweta and Rahman, Tanzila and Yi, Kwang Moo and Sigal, Leonid}, title = {Prompting Hard or Hardly Prompting: Prompt Inversion for Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6808-6817} }
Unmixing Before Fusion: A Generalized Paradigm for Multi-Source-based Hyperspectral Image Synthesis: Yang Yu,

Erting Pan,

Xinya Wang,

Yuheng Wu,

Xiaoguang Mei,

Jiayi Ma; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2024_CVPR, author = {Yu, Yang and Pan, Erting and Wang, Xinya and Wu, Yuheng and Mei, Xiaoguang and Ma, Jiayi}, title = {Unmixing Before Fusion: A Generalized Paradigm for Multi-Source-based Hyperspectral Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9297-9306} }
CoDi: Conditional Diffusion Distillation for Higher-Fidelity and Faster Image Generation: Kangfu Mei,

Mauricio Delbracio,

Hossein Talebi,

Zhengzhong Tu,

Vishal M. Patel,

Peyman Milanfar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mei_2024_CVPR, author = {Mei, Kangfu and Delbracio, Mauricio and Talebi, Hossein and Tu, Zhengzhong and Patel, Vishal M. and Milanfar, Peyman}, title = {CoDi: Conditional Diffusion Distillation for Higher-Fidelity and Faster Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9048-9058} }
X-Adapter: Adding Universal Compatibility of Plugins for Upgraded Diffusion Model: Lingmin Ran,

Xiaodong Cun,

Jia-Wei Liu,

Rui Zhao,

Song Zijie,

Xintao Wang,

Jussi Keppo,

Mike Zheng Shou; [pdf] [supp]
[bibtex]
@InProceedings{Ran_2024_CVPR, author = {Ran, Lingmin and Cun, Xiaodong and Liu, Jia-Wei and Zhao, Rui and Zijie, Song and Wang, Xintao and Keppo, Jussi and Shou, Mike Zheng}, title = {X-Adapter: Adding Universal Compatibility of Plugins for Upgraded Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8775-8784} }
CADTalk: An Algorithm and Benchmark for Semantic Commenting of CAD Programs: Haocheng Yuan,

Jing Xu,

Hao Pan,

Adrien Bousseau,

Niloy J. Mitra,

Changjian Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2024_CVPR, author = {Yuan, Haocheng and Xu, Jing and Pan, Hao and Bousseau, Adrien and Mitra, Niloy J. and Li, Changjian}, title = {CADTalk: An Algorithm and Benchmark for Semantic Commenting of CAD Programs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3753-3762} }
Inversion-Free Image Editing with Language-Guided Diffusion Models: Sihan Xu,

Yidong Huang,

Jiayi Pan,

Ziqiao Ma,

Joyce Chai; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Sihan and Huang, Yidong and Pan, Jiayi and Ma, Ziqiao and Chai, Joyce}, title = {Inversion-Free Image Editing with Language-Guided Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9452-9461} }
HumMUSS: Human Motion Understanding using State Space Models: Arnab Mondal,

Stefano Alletto,

Denis Tome; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mondal_2024_CVPR, author = {Mondal, Arnab and Alletto, Stefano and Tome, Denis}, title = {HumMUSS: Human Motion Understanding using State Space Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2318-2330} }
Drag Your Noise: Interactive Point-based Editing via Diffusion Semantic Propagation: Haofeng Liu,

Chenshu Xu,

Yifei Yang,

Lihua Zeng,

Shengfeng He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Haofeng and Xu, Chenshu and Yang, Yifei and Zeng, Lihua and He, Shengfeng}, title = {Drag Your Noise: Interactive Point-based Editing via Diffusion Semantic Propagation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6743-6752} }
ContextSeg: Sketch Semantic Segmentation by Querying the Context with Attention: Jiawei Wang,

Changjian Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Jiawei and Li, Changjian}, title = {ContextSeg: Sketch Semantic Segmentation by Querying the Context with Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3679-3688} }
Taming the Tail in Class-Conditional GANs: Knowledge Sharing via Unconditional Training at Lower Resolutions: Saeed Khorram,

Mingqi Jiang,

Mohamad Shahbazi,

Mohamad H. Danesh,

Li Fuxin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Khorram_2024_CVPR, author = {Khorram, Saeed and Jiang, Mingqi and Shahbazi, Mohamad and Danesh, Mohamad H. and Fuxin, Li}, title = {Taming the Tail in Class-Conditional GANs: Knowledge Sharing via Unconditional Training at Lower Resolutions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7580-7590} }
VideoSwap: Customized Video Subject Swapping with Interactive Semantic Point Correspondence: Yuchao Gu,

Yipin Zhou,

Bichen Wu,

Licheng Yu,

Jia-Wei Liu,

Rui Zhao,

Jay Zhangjie Wu,

David Junhao Zhang,

Mike Zheng Shou,

Kevin Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gu_2024_CVPR, author = {Gu, Yuchao and Zhou, Yipin and Wu, Bichen and Yu, Licheng and Liu, Jia-Wei and Zhao, Rui and Wu, Jay Zhangjie and Zhang, David Junhao and Shou, Mike Zheng and Tang, Kevin}, title = {VideoSwap: Customized Video Subject Swapping with Interactive Semantic Point Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7621-7630} }
Hierarchical Histogram Threshold Segmentation - Auto-terminating High-detail Oversegmentation: Thomas V. Chang,

Simon Seibt,

Bartosz von Rymon Lipinski; [pdf] [supp]
[bibtex]
@InProceedings{Chang_2024_CVPR, author = {Chang, Thomas V. and Seibt, Simon and von Rymon Lipinski, Bartosz}, title = {Hierarchical Histogram Threshold Segmentation - Auto-terminating High-detail Oversegmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3195-3204} }
Once for Both: Single Stage of Importance and Sparsity Search for Vision Transformer Compression: Hancheng Ye,

Chong Yu,

Peng Ye,

Renqiu Xia,

Yansong Tang,

Jiwen Lu,

Tao Chen,

Bo Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2024_CVPR, author = {Ye, Hancheng and Yu, Chong and Ye, Peng and Xia, Renqiu and Tang, Yansong and Lu, Jiwen and Chen, Tao and Zhang, Bo}, title = {Once for Both: Single Stage of Importance and Sparsity Search for Vision Transformer Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5578-5588} }
As-Plausible-As-Possible: Plausibility-Aware Mesh Deformation Using 2D Diffusion Priors: Seungwoo Yoo,

Kunho Kim,

Vladimir G. Kim,

Minhyuk Sung; [pdf] [supp]
[bibtex]
@InProceedings{Yoo_2024_CVPR, author = {Yoo, Seungwoo and Kim, Kunho and Kim, Vladimir G. and Sung, Minhyuk}, title = {As-Plausible-As-Possible: Plausibility-Aware Mesh Deformation Using 2D Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4315-4324} }
ECLIPSE: Efficient Continual Learning in Panoptic Segmentation with Visual Prompt Tuning: Beomyoung Kim,

Joonsang Yu,

Sung Ju Hwang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2024_CVPR, author = {Kim, Beomyoung and Yu, Joonsang and Hwang, Sung Ju}, title = {ECLIPSE: Efficient Continual Learning in Panoptic Segmentation with Visual Prompt Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3346-3356} }
MaGGIe: Masked Guided Gradual Human Instance Matting: Chuong Huynh,

Seoung Wug Oh,

Abhinav Shrivastava,

Joon-Young Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huynh_2024_CVPR, author = {Huynh, Chuong and Oh, Seoung Wug and Shrivastava, Abhinav and Lee, Joon-Young}, title = {MaGGIe: Masked Guided Gradual Human Instance Matting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3870-3879} }
Towards the Uncharted: Density-Descending Feature Perturbation for Semi-supervised Semantic Segmentation: Xiaoyang Wang,

Huihui Bai,

Limin Yu,

Yao Zhao,

Jimin Xiao; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Xiaoyang and Bai, Huihui and Yu, Limin and Zhao, Yao and Xiao, Jimin}, title = {Towards the Uncharted: Density-Descending Feature Perturbation for Semi-supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3303-3312} }
RTMO: Towards High-Performance One-Stage Real-Time Multi-Person Pose Estimation: Peng Lu,

Tao Jiang,

Yining Li,

Xiangtai Li,

Kai Chen,

Wenming Yang; [pdf] [arXiv]
[bibtex]
@InProceedings{Lu_2024_CVPR, author = {Lu, Peng and Jiang, Tao and Li, Yining and Li, Xiangtai and Chen, Kai and Yang, Wenming}, title = {RTMO: Towards High-Performance One-Stage Real-Time Multi-Person Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1491-1500} }
WaveFace: Authentic Face Restoration with Efficient Frequency Recovery: Yunqi Miao,

Jiankang Deng,

Jungong Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Miao_2024_CVPR, author = {Miao, Yunqi and Deng, Jiankang and Han, Jungong}, title = {WaveFace: Authentic Face Restoration with Efficient Frequency Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6583-6592} }
UltrAvatar: A Realistic Animatable 3D Avatar Diffusion Model with Authenticity Guided Textures: Mingyuan Zhou,

Rakib Hyder,

Ziwei Xuan,

Guojun Qi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2024_CVPR, author = {Zhou, Mingyuan and Hyder, Rakib and Xuan, Ziwei and Qi, Guojun}, title = {UltrAvatar: A Realistic Animatable 3D Avatar Diffusion Model with Authenticity Guided Textures}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1238-1248} }
Attention-Propagation Network for Egocentric Heatmap to 3D Pose Lifting: Taeho Kang,

Youngki Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2024_CVPR, author = {Kang, Taeho and Lee, Youngki}, title = {Attention-Propagation Network for Egocentric Heatmap to 3D Pose Lifting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {842-851} }
OmniMotionGPT: Animal Motion Generation with Limited Data: Zhangsihao Yang,

Mingyuan Zhou,

Mengyi Shan,

Bingbing Wen,

Ziwei Xuan,

Mitch Hill,

Junjie Bai,

Guo-Jun Qi,

Yalin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Zhangsihao and Zhou, Mingyuan and Shan, Mengyi and Wen, Bingbing and Xuan, Ziwei and Hill, Mitch and Bai, Junjie and Qi, Guo-Jun and Wang, Yalin}, title = {OmniMotionGPT: Animal Motion Generation with Limited Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1249-1259} }
InstanceDiffusion: Instance-level Control for Image Generation: Xudong Wang,

Trevor Darrell,

Sai Saketh Rambhatla,

Rohit Girdhar,

Ishan Misra; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Xudong and Darrell, Trevor and Rambhatla, Sai Saketh and Girdhar, Rohit and Misra, Ishan}, title = {InstanceDiffusion: Instance-level Control for Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6232-6242} }
Unifying Top-down and Bottom-up Scanpath Prediction Using Transformers: Zhibo Yang,

Sounak Mondal,

Seoyoung Ahn,

Ruoyu Xue,

Gregory Zelinsky,

Minh Hoai,

Dimitris Samaras; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Zhibo and Mondal, Sounak and Ahn, Seoyoung and Xue, Ruoyu and Zelinsky, Gregory and Hoai, Minh and Samaras, Dimitris}, title = {Unifying Top-down and Bottom-up Scanpath Prediction Using Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1683-1693} }
3D-Aware Face Editing via Warping-Guided Latent Direction Learning: Yuhao Cheng,

Zhuo Chen,

Xingyu Ren,

Wenhan Zhu,

Zhengqin Xu,

Di Xu,

Changpeng Yang,

Yichao Yan; [pdf]
[bibtex]
@InProceedings{Cheng_2024_CVPR, author = {Cheng, Yuhao and Chen, Zhuo and Ren, Xingyu and Zhu, Wenhan and Xu, Zhengqin and Xu, Di and Yang, Changpeng and Yan, Yichao}, title = {3D-Aware Face Editing via Warping-Guided Latent Direction Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {916-926} }
CAT-Seg: Cost Aggregation for Open-Vocabulary Semantic Segmentation: Seokju Cho,

Heeseong Shin,

Sunghwan Hong,

Anurag Arnab,

Paul Hongsuck Seo,

Seungryong Kim; [pdf] [supp]
[bibtex]
@InProceedings{Cho_2024_CVPR, author = {Cho, Seokju and Shin, Heeseong and Hong, Sunghwan and Arnab, Anurag and Seo, Paul Hongsuck and Kim, Seungryong}, title = {CAT-Seg: Cost Aggregation for Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4113-4123} }
Focus on Your Instruction: Fine-grained and Multi-instruction Image Editing by Attention Modulation: Qin Guo,

Tianwei Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2024_CVPR, author = {Guo, Qin and Lin, Tianwei}, title = {Focus on Your Instruction: Fine-grained and Multi-instruction Image Editing by Attention Modulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6986-6996} }
AvatarGPT: All-in-One Framework for Motion Understanding Planning Generation and Beyond: Zixiang Zhou,

Yu Wan,

Baoyuan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2024_CVPR, author = {Zhou, Zixiang and Wan, Yu and Wang, Baoyuan}, title = {AvatarGPT: All-in-One Framework for Motion Understanding Planning Generation and Beyond}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1357-1366} }
Co-Speech Gesture Video Generation via Motion-Decoupled Diffusion Model: Xu He,

Qiaochu Huang,

Zhensong Zhang,

Zhiwei Lin,

Zhiyong Wu,

Sicheng Yang,

Minglei Li,

Zhiyi Chen,

Songcen Xu,

Xiaofei Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2024_CVPR, author = {He, Xu and Huang, Qiaochu and Zhang, Zhensong and Lin, Zhiwei and Wu, Zhiyong and Yang, Sicheng and Li, Minglei and Chen, Zhiyi and Xu, Songcen and Wu, Xiaofei}, title = {Co-Speech Gesture Video Generation via Motion-Decoupled Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2263-2273} }
CDFormer: When Degradation Prediction Embraces Diffusion Model for Blind Image Super-Resolution: Qingguo Liu,

Chenyi Zhuang,

Pan Gao,

Jie Qin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Qingguo and Zhuang, Chenyi and Gao, Pan and Qin, Jie}, title = {CDFormer: When Degradation Prediction Embraces Diffusion Model for Blind Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7455-7464} }
HumanRef: Single Image to 3D Human Generation via Reference-Guided Diffusion: Jingbo Zhang,

Xiaoyu Li,

Qi Zhang,

Yanpei Cao,

Ying Shan,

Jing Liao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Jingbo and Li, Xiaoyu and Zhang, Qi and Cao, Yanpei and Shan, Ying and Liao, Jing}, title = {HumanRef: Single Image to 3D Human Generation via Reference-Guided Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1844-1854} }
Rethinking Interactive Image Segmentation with Low Latency High Quality and Diverse Prompts: Qin Liu,

Jaemin Cho,

Mohit Bansal,

Marc Niethammer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Qin and Cho, Jaemin and Bansal, Mohit and Niethammer, Marc}, title = {Rethinking Interactive Image Segmentation with Low Latency High Quality and Diverse Prompts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3773-3782} }
DITTO: Dual and Integrated Latent Topologies for Implicit 3D Reconstruction: Jaehyeok Shim,

Kyungdon Joo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shim_2024_CVPR, author = {Shim, Jaehyeok and Joo, Kyungdon}, title = {DITTO: Dual and Integrated Latent Topologies for Implicit 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5396-5405} }
HIT: Estimating Internal Human Implicit Tissues from the Body Surface: Marilyn Keller,

Vaibhav Arora,

Abdelmouttaleb Dakri,

Shivam Chandhok,

Jürgen Machann,

Andreas Fritsche,

Michael J. Black,

Sergi Pujades; [pdf] [supp]
[bibtex]
@InProceedings{Keller_2024_CVPR, author = {Keller, Marilyn and Arora, Vaibhav and Dakri, Abdelmouttaleb and Chandhok, Shivam and Machann, J\"urgen and Fritsche, Andreas and Black, Michael J. and Pujades, Sergi}, title = {HIT: Estimating Internal Human Implicit Tissues from the Body Surface}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3480-3490} }
DanceCamera3D: 3D Camera Movement Synthesis with Music and Dance: Zixuan Wang,

Jia Jia,

Shikun Sun,

Haozhe Wu,

Rong Han,

Zhenyu Li,

Di Tang,

Jiaqing Zhou,

Jiebo Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Zixuan and Jia, Jia and Sun, Shikun and Wu, Haozhe and Han, Rong and Li, Zhenyu and Tang, Di and Zhou, Jiaqing and Luo, Jiebo}, title = {DanceCamera3D: 3D Camera Movement Synthesis with Music and Dance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7892-7901} }
Cross Initialization for Face Personalization of Text-to-Image Models: Lianyu Pang,

Jian Yin,

Haoran Xie,

Qiping Wang,

Qing Li,

Xudong Mao; [pdf] [supp]
[bibtex]
@InProceedings{Pang_2024_CVPR, author = {Pang, Lianyu and Yin, Jian and Xie, Haoran and Wang, Qiping and Li, Qing and Mao, Xudong}, title = {Cross Initialization for Face Personalization of Text-to-Image Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8393-8403} }
LEDITS++: Limitless Image Editing using Text-to-Image Models: Manuel Brack,

Felix Friedrich,

Katharia Kornmeier,

Linoy Tsaban,

Patrick Schramowski,

Kristian Kersting,

Apolinario Passos; [pdf] [supp]
[bibtex]
@InProceedings{Brack_2024_CVPR, author = {Brack, Manuel and Friedrich, Felix and Kornmeier, Katharia and Tsaban, Linoy and Schramowski, Patrick and Kersting, Kristian and Passos, Apolinario}, title = {LEDITS++: Limitless Image Editing using Text-to-Image Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8861-8870} }
Video Interpolation with Diffusion Models: Siddhant Jain,

Daniel Watson,

Eric Tabellion,

Aleksander Ho?ynski,

Ben Poole,

Janne Kontkanen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jain_2024_CVPR, author = {Jain, Siddhant and Watson, Daniel and Tabellion, Eric and Ho?ynski, Aleksander and Poole, Ben and Kontkanen, Janne}, title = {Video Interpolation with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7341-7351} }
Learning Adaptive Spatial Coherent Correlations for Speech-Preserving Facial Expression Manipulation: Tianshui Chen,

Jianman Lin,

Zhijing Yang,

Chunmei Qing,

Liang Lin; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Tianshui and Lin, Jianman and Yang, Zhijing and Qing, Chunmei and Lin, Liang}, title = {Learning Adaptive Spatial Coherent Correlations for Speech-Preserving Facial Expression Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7267-7276} }
WHAM: Reconstructing World-grounded Humans with Accurate 3D Motion: Soyong Shin,

Juyong Kim,

Eni Halilaj,

Michael J. Black; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shin_2024_CVPR, author = {Shin, Soyong and Kim, Juyong and Halilaj, Eni and Black, Michael J.}, title = {WHAM: Reconstructing World-grounded Humans with Accurate 3D Motion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2070-2080} }
DiffPerformer: Iterative Learning of Consistent Latent Guidance for Diffusion-based Human Video Generation: Chenyang Wang,

Zerong Zheng,

Tao Yu,

Xiaoqian Lv,

Bineng Zhong,

Shengping Zhang,

Liqiang Nie; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Chenyang and Zheng, Zerong and Yu, Tao and Lv, Xiaoqian and Zhong, Bineng and Zhang, Shengping and Nie, Liqiang}, title = {DiffPerformer: Iterative Learning of Consistent Latent Guidance for Diffusion-based Human Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6169-6179} }
Category-Level Multi-Part Multi-Joint 3D Shape Assembly: Yichen Li,

Kaichun Mo,

Yueqi Duan,

He Wang,

Jiequan Zhang,

Lin Shao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Yichen and Mo, Kaichun and Duan, Yueqi and Wang, He and Zhang, Jiequan and Shao, Lin}, title = {Category-Level Multi-Part Multi-Joint 3D Shape Assembly}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3281-3291} }
One-Shot Open Affordance Learning with Foundation Models: Gen Li,

Deqing Sun,

Laura Sevilla-Lara,

Varun Jampani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Gen and Sun, Deqing and Sevilla-Lara, Laura and Jampani, Varun}, title = {One-Shot Open Affordance Learning with Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3086-3096} }
Don't Look into the Dark: Latent Codes for Pluralistic Image Inpainting: Haiwei Chen,

Yajie Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Haiwei and Zhao, Yajie}, title = {Don't Look into the Dark: Latent Codes for Pluralistic Image Inpainting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7591-7600} }
DiffEditor: Boosting Accuracy and Flexibility on Diffusion-based Image Editing: Chong Mou,

Xintao Wang,

Jiechong Song,

Ying Shan,

Jian Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mou_2024_CVPR, author = {Mou, Chong and Wang, Xintao and Song, Jiechong and Shan, Ying and Zhang, Jian}, title = {DiffEditor: Boosting Accuracy and Flexibility on Diffusion-based Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8488-8497} }
InstructVideo: Instructing Video Diffusion Models with Human Feedback: Hangjie Yuan,

Shiwei Zhang,

Xiang Wang,

Yujie Wei,

Tao Feng,

Yining Pan,

Yingya Zhang,

Ziwei Liu,

Samuel Albanie,

Dong Ni; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2024_CVPR, author = {Yuan, Hangjie and Zhang, Shiwei and Wang, Xiang and Wei, Yujie and Feng, Tao and Pan, Yining and Zhang, Yingya and Liu, Ziwei and Albanie, Samuel and Ni, Dong}, title = {InstructVideo: Instructing Video Diffusion Models with Human Feedback}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6463-6474} }
On the Content Bias in Frechet Video Distance: Songwei Ge,

Aniruddha Mahapatra,

Gaurav Parmar,

Jun-Yan Zhu,

Jia-Bin Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ge_2024_CVPR, author = {Ge, Songwei and Mahapatra, Aniruddha and Parmar, Gaurav and Zhu, Jun-Yan and Huang, Jia-Bin}, title = {On the Content Bias in Frechet Video Distance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7277-7288} }
Image Neural Field Diffusion Models: Yinbo Chen,

Oliver Wang,

Richard Zhang,

Eli Shechtman,

Xiaolong Wang,

Michael Gharbi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Yinbo and Wang, Oliver and Zhang, Richard and Shechtman, Eli and Wang, Xiaolong and Gharbi, Michael}, title = {Image Neural Field Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8007-8017} }
Discriminative Probing and Tuning for Text-to-Image Generation: Leigang Qu,

Wenjie Wang,

Yongqi Li,

Hanwang Zhang,

Liqiang Nie,

Tat-Seng Chua; [pdf] [arXiv]
[bibtex]
@InProceedings{Qu_2024_CVPR, author = {Qu, Leigang and Wang, Wenjie and Li, Yongqi and Zhang, Hanwang and Nie, Liqiang and Chua, Tat-Seng}, title = {Discriminative Probing and Tuning for Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7434-7444} }
Towards More Accurate Diffusion Model Acceleration with A Timestep Tuner: Mengfei Xia,

Yujun Shen,

Changsong Lei,

Yu Zhou,

Deli Zhao,

Ran Yi,

Wenping Wang,

Yong-Jin Liu; [pdf] [supp]
[bibtex]
@InProceedings{Xia_2024_CVPR, author = {Xia, Mengfei and Shen, Yujun and Lei, Changsong and Zhou, Yu and Zhao, Deli and Yi, Ran and Wang, Wenping and Liu, Yong-Jin}, title = {Towards More Accurate Diffusion Model Acceleration with A Timestep Tuner}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5736-5745} }
Rethinking Generalizable Face Anti-spoofing via Hierarchical Prototype-guided Distribution Refinement in Hyperbolic Space: Chengyang Hu,

Ke-Yue Zhang,

Taiping Yao,

Shouhong Ding,

Lizhuang Ma; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2024_CVPR, author = {Hu, Chengyang and Zhang, Ke-Yue and Yao, Taiping and Ding, Shouhong and Ma, Lizhuang}, title = {Rethinking Generalizable Face Anti-spoofing via Hierarchical Prototype-guided Distribution Refinement in Hyperbolic Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1032-1041} }
GenesisTex: Adapting Image Denoising Diffusion to Texture Space: Chenjian Gao,

Boyan Jiang,

Xinghui Li,

Yingpeng Zhang,

Qian Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2024_CVPR, author = {Gao, Chenjian and Jiang, Boyan and Li, Xinghui and Zhang, Yingpeng and Yu, Qian}, title = {GenesisTex: Adapting Image Denoising Diffusion to Texture Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4620-4629} }
Image-to-Image Matching via Foundation Models: A New Perspective for Open-Vocabulary Semantic Segmentation: Yuan Wang,

Rui Sun,

Naisong Luo,

Yuwen Pan,

Tianzhu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Yuan and Sun, Rui and Luo, Naisong and Pan, Yuwen and Zhang, Tianzhu}, title = {Image-to-Image Matching via Foundation Models: A New Perspective for Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3952-3963} }
BigGait: Learning Gait Representation You Want by Large Vision Models: Dingqiang Ye,

Chao Fan,

Jingzhe Ma,

Xiaoming Liu,

Shiqi Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2024_CVPR, author = {Ye, Dingqiang and Fan, Chao and Ma, Jingzhe and Liu, Xiaoming and Yu, Shiqi}, title = {BigGait: Learning Gait Representation You Want by Large Vision Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {200-210} }
HOIST-Former: Hand-held Objects Identification Segmentation and Tracking in the Wild: Supreeth Narasimhaswamy,

Huy Anh Nguyen,

Lihan Huang,

Minh Hoai; [pdf]
[bibtex]
@InProceedings{Narasimhaswamy_2024_CVPR, author = {Narasimhaswamy, Supreeth and Nguyen, Huy Anh and Huang, Lihan and Hoai, Minh}, title = {HOIST-Former: Hand-held Objects Identification Segmentation and Tracking in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2351-2361} }
Contextrast: Contextual Contrastive Learning for Semantic Segmentation: Changki Sung,

Wanhee Kim,

Jungho An,

Wooju Lee,

Hyungtae Lim,

Hyun Myung; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sung_2024_CVPR, author = {Sung, Changki and Kim, Wanhee and An, Jungho and Lee, Wooju and Lim, Hyungtae and Myung, Hyun}, title = {Contextrast: Contextual Contrastive Learning for Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3732-3742} }
AUEditNet: Dual-Branch Facial Action Unit Intensity Manipulation with Implicit Disentanglement: Shiwei Jin,

Zhen Wang,

Lei Wang,

Peng Liu,

Ning Bi,

Truong Nguyen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jin_2024_CVPR, author = {Jin, Shiwei and Wang, Zhen and Wang, Lei and Liu, Peng and Bi, Ning and Nguyen, Truong}, title = {AUEditNet: Dual-Branch Facial Action Unit Intensity Manipulation with Implicit Disentanglement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2104-2113} }
BodyMAP - Jointly Predicting Body Mesh and 3D Applied Pressure Map for People in Bed: Abhishek Tandon,

Anujraaj Goyal,

Henry M. Clever,

Zackory Erickson; [pdf] [supp]
[bibtex]
@InProceedings{Tandon_2024_CVPR, author = {Tandon, Abhishek and Goyal, Anujraaj and Clever, Henry M. and Erickson, Zackory}, title = {BodyMAP - Jointly Predicting Body Mesh and 3D Applied Pressure Map for People in Bed}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2480-2489} }
KPConvX: Modernizing Kernel Point Convolution with Kernel Attention: Hugues Thomas,

Yao-Hung Hubert Tsai,

Timothy D. Barfoot,

Jian Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Thomas_2024_CVPR, author = {Thomas, Hugues and Tsai, Yao-Hung Hubert and Barfoot, Timothy D. and Zhang, Jian}, title = {KPConvX: Modernizing Kernel Point Convolution with Kernel Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5525-5535} }
Clockwork Diffusion: Efficient Generation With Model-Step Distillation: Amirhossein Habibian,

Amir Ghodrati,

Noor Fathima,

Guillaume Sautiere,

Risheek Garrepalli,

Fatih Porikli,

Jens Petersen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Habibian_2024_CVPR, author = {Habibian, Amirhossein and Ghodrati, Amir and Fathima, Noor and Sautiere, Guillaume and Garrepalli, Risheek and Porikli, Fatih and Petersen, Jens}, title = {Clockwork Diffusion: Efficient Generation With Model-Step Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8352-8361} }
Pick-or-Mix: Dynamic Channel Sampling for ConvNets: Ashish Kumar,

Daneul Kim,

Jaesik Park,

Laxmidhar Behera; [pdf] [supp]
[bibtex]
@InProceedings{Kumar_2024_CVPR, author = {Kumar, Ashish and Kim, Daneul and Park, Jaesik and Behera, Laxmidhar}, title = {Pick-or-Mix: Dynamic Channel Sampling for ConvNets}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5873-5882} }
DyBluRF: Dynamic Neural Radiance Fields from Blurry Monocular Video: Huiqiang Sun,

Xingyi Li,

Liao Shen,

Xinyi Ye,

Ke Xian,

Zhiguo Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2024_CVPR, author = {Sun, Huiqiang and Li, Xingyi and Shen, Liao and Ye, Xinyi and Xian, Ke and Cao, Zhiguo}, title = {DyBluRF: Dynamic Neural Radiance Fields from Blurry Monocular Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7517-7527} }
AAMDM: Accelerated Auto-regressive Motion Diffusion Model: Tianyu Li,

Calvin Qiao,

Guanqiao Ren,

KangKang Yin,

Sehoon Ha; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Tianyu and Qiao, Calvin and Ren, Guanqiao and Yin, KangKang and Ha, Sehoon}, title = {AAMDM: Accelerated Auto-regressive Motion Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1813-1823} }
Towards Understanding Cross and Self-Attention in Stable Diffusion for Text-Guided Image Editing: Bingyan Liu,

Chengyu Wang,

Tingfeng Cao,

Kui Jia,

Jun Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Bingyan and Wang, Chengyu and Cao, Tingfeng and Jia, Kui and Huang, Jun}, title = {Towards Understanding Cross and Self-Attention in Stable Diffusion for Text-Guided Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7817-7826} }
DiverGen: Improving Instance Segmentation by Learning Wider Data Distribution with More Diverse Generative Data: Chengxiang Fan,

Muzhi Zhu,

Hao Chen,

Yang Liu,

Weijia Wu,

Huaqi Zhang,

Chunhua Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2024_CVPR, author = {Fan, Chengxiang and Zhu, Muzhi and Chen, Hao and Liu, Yang and Wu, Weijia and Zhang, Huaqi and Shen, Chunhua}, title = {DiverGen: Improving Instance Segmentation by Learning Wider Data Distribution with More Diverse Generative Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3986-3995} }
Learning Disentangled Identifiers for Action-Customized Text-to-Image Generation: Siteng Huang,

Biao Gong,

Yutong Feng,

Xi Chen,

Yuqian Fu,

Yu Liu,

Donglin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2024_CVPR, author = {Huang, Siteng and Gong, Biao and Feng, Yutong and Chen, Xi and Fu, Yuqian and Liu, Yu and Wang, Donglin}, title = {Learning Disentangled Identifiers for Action-Customized Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7797-7806} }
Automatic Controllable Colorization via Imagination: Xiaoyan Cong,

Yue Wu,

Qifeng Chen,

Chenyang Lei; [pdf] [arXiv]
[bibtex]
@InProceedings{Cong_2024_CVPR, author = {Cong, Xiaoyan and Wu, Yue and Chen, Qifeng and Lei, Chenyang}, title = {Automatic Controllable Colorization via Imagination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2609-2619} }
EMOPortraits: Emotion-enhanced Multimodal One-shot Head Avatars: Nikita Drobyshev,

Antoni Bigata Casademunt,

Konstantinos Vougioukas,

Zoe Landgraf,

Stavros Petridis,

Maja Pantic; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Drobyshev_2024_CVPR, author = {Drobyshev, Nikita and Casademunt, Antoni Bigata and Vougioukas, Konstantinos and Landgraf, Zoe and Petridis, Stavros and Pantic, Maja}, title = {EMOPortraits: Emotion-enhanced Multimodal One-shot Head Avatars}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8498-8507} }
Open3DIS: Open-Vocabulary 3D Instance Segmentation with 2D Mask Guidance: Phuc Nguyen,

Tuan Duc Ngo,

Evangelos Kalogerakis,

Chuang Gan,

Anh Tran,

Cuong Pham,

Khoi Nguyen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2024_CVPR, author = {Nguyen, Phuc and Ngo, Tuan Duc and Kalogerakis, Evangelos and Gan, Chuang and Tran, Anh and Pham, Cuong and Nguyen, Khoi}, title = {Open3DIS: Open-Vocabulary 3D Instance Segmentation with 2D Mask Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4018-4028} }
ControlRoom3D: Room Generation using Semantic Proxy Rooms: Jonas Schult,

Sam Tsai,

Lukas Höllein,

Bichen Wu,

Jialiang Wang,

Chih-Yao Ma,

Kunpeng Li,

Xiaofang Wang,

Felix Wimbauer,

Zijian He,

Peizhao Zhang,

Bastian Leibe,

Peter Vajda,

Ji Hou; [pdf] [supp]
[bibtex]
@InProceedings{Schult_2024_CVPR, author = {Schult, Jonas and Tsai, Sam and H\"ollein, Lukas and Wu, Bichen and Wang, Jialiang and Ma, Chih-Yao and Li, Kunpeng and Wang, Xiaofang and Wimbauer, Felix and He, Zijian and Zhang, Peizhao and Leibe, Bastian and Vajda, Peter and Hou, Ji}, title = {ControlRoom3D: Room Generation using Semantic Proxy Rooms}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6201-6210} }
UniPTS: A Unified Framework for Proficient Post-Training Sparsity: Jingjing Xie,

Yuxin Zhang,

Mingbao Lin,

Liujuan Cao,

Rongrong Ji; [pdf] [arXiv]
[bibtex]
@InProceedings{Xie_2024_CVPR, author = {Xie, Jingjing and Zhang, Yuxin and Lin, Mingbao and Cao, Liujuan and Ji, Rongrong}, title = {UniPTS: A Unified Framework for Proficient Post-Training Sparsity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5746-5755} }
HumanNorm: Learning Normal Diffusion Model for High-quality and Realistic 3D Human Generation: Xin Huang,

Ruizhi Shao,

Qi Zhang,

Hongwen Zhang,

Ying Feng,

Yebin Liu,

Qing Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2024_CVPR, author = {Huang, Xin and Shao, Ruizhi and Zhang, Qi and Zhang, Hongwen and Feng, Ying and Liu, Yebin and Wang, Qing}, title = {HumanNorm: Learning Normal Diffusion Model for High-quality and Realistic 3D Human Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4568-4577} }
Cross-view and Cross-pose Completion for 3D Human Understanding: Matthieu Armando,

Salma Galaaoui,

Fabien Baradel,

Thomas Lucas,

Vincent Leroy,

Romain Brégier,

Philippe Weinzaepfel,

Grégory Rogez; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Armando_2024_CVPR, author = {Armando, Matthieu and Galaaoui, Salma and Baradel, Fabien and Lucas, Thomas and Leroy, Vincent and Br\'egier, Romain and Weinzaepfel, Philippe and Rogez, Gr\'egory}, title = {Cross-view and Cross-pose Completion for 3D Human Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1512-1523} }
Efficient Scene Recovery Using Luminous Flux Prior: Zhongyu Li,

Lei Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Zhongyu and Zhang, Lei}, title = {Efficient Scene Recovery Using Luminous Flux Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2743-2752} }
Customize your NeRF: Adaptive Source Driven 3D Scene Editing via Local-Global Iterative Training: Runze He,

Shaofei Huang,

Xuecheng Nie,

Tianrui Hui,

Luoqi Liu,

Jiao Dai,

Jizhong Han,

Guanbin Li,

Si Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2024_CVPR, author = {He, Runze and Huang, Shaofei and Nie, Xuecheng and Hui, Tianrui and Liu, Luoqi and Dai, Jiao and Han, Jizhong and Li, Guanbin and Liu, Si}, title = {Customize your NeRF: Adaptive Source Driven 3D Scene Editing via Local-Global Iterative Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6966-6975} }
Spherical Mask: Coarse-to-Fine 3D Point Cloud Instance Segmentation with Spherical Representation: Sangyun Shin,

Kaichen Zhou,

Madhu Vankadari,

Andrew Markham,

Niki Trigoni; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shin_2024_CVPR, author = {Shin, Sangyun and Zhou, Kaichen and Vankadari, Madhu and Markham, Andrew and Trigoni, Niki}, title = {Spherical Mask: Coarse-to-Fine 3D Point Cloud Instance Segmentation with Spherical Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4060-4069} }
FSRT: Facial Scene Representation Transformer for Face Reenactment from Factorized Appearance Head-pose and Facial Expression Features: Andre Rochow,

Max Schwarz,

Sven Behnke; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rochow_2024_CVPR, author = {Rochow, Andre and Schwarz, Max and Behnke, Sven}, title = {FSRT: Facial Scene Representation Transformer for Face Reenactment from Factorized Appearance Head-pose and Facial Expression Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7716-7726} }
TetraSphere: A Neural Descriptor for O(3)-Invariant Point Cloud Analysis: Pavlo Melnyk,

Andreas Robinson,

Michael Felsberg,

Mårten Wadenbäck; [pdf] [supp]
[bibtex]
@InProceedings{Melnyk_2024_CVPR, author = {Melnyk, Pavlo and Robinson, Andreas and Felsberg, Michael and Wadenb\"ack, M\r{a}rten}, title = {TetraSphere: A Neural Descriptor for O(3)-Invariant Point Cloud Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5620-5630} }
WANDR: Intention-guided Human Motion Generation: Markos Diomataris,

Nikos Athanasiou,

Omid Taheri,

Xi Wang,

Otmar Hilliges,

Michael J. Black; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Diomataris_2024_CVPR, author = {Diomataris, Markos and Athanasiou, Nikos and Taheri, Omid and Wang, Xi and Hilliges, Otmar and Black, Michael J.}, title = {WANDR: Intention-guided Human Motion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {927-936} }
GroupContrast: Semantic-aware Self-supervised Representation Learning for 3D Understanding: Chengyao Wang,

Li Jiang,

Xiaoyang Wu,

Zhuotao Tian,

Bohao Peng,

Hengshuang Zhao,

Jiaya Jia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Chengyao and Jiang, Li and Wu, Xiaoyang and Tian, Zhuotao and Peng, Bohao and Zhao, Hengshuang and Jia, Jiaya}, title = {GroupContrast: Semantic-aware Self-supervised Representation Learning for 3D Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4917-4928} }
Privacy-Preserving Face Recognition Using Trainable Feature Subtraction: Yuxi Mi,

Zhizhou Zhong,

Yuge Huang,

Jiazhen Ji,

Jianqing Xu,

Jun Wang,

Shaoming Wang,

Shouhong Ding,

Shuigeng Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mi_2024_CVPR, author = {Mi, Yuxi and Zhong, Zhizhou and Huang, Yuge and Ji, Jiazhen and Xu, Jianqing and Wang, Jun and Wang, Shaoming and Ding, Shouhong and Zhou, Shuigeng}, title = {Privacy-Preserving Face Recognition Using Trainable Feature Subtraction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {297-307} }
Learning Visual Prompt for Gait Recognition: Kang Ma,

Ying Fu,

Chunshui Cao,

Saihui Hou,

Yongzhen Huang,

Dezhi Zheng; [pdf]
[bibtex]
@InProceedings{Ma_2024_CVPR, author = {Ma, Kang and Fu, Ying and Cao, Chunshui and Hou, Saihui and Huang, Yongzhen and Zheng, Dezhi}, title = {Learning Visual Prompt for Gait Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {593-603} }
SC-GS: Sparse-Controlled Gaussian Splatting for Editable Dynamic Scenes: Yi-Hua Huang,

Yang-Tian Sun,

Ziyi Yang,

Xiaoyang Lyu,

Yan-Pei Cao,

Xiaojuan Qi; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2024_CVPR, author = {Huang, Yi-Hua and Sun, Yang-Tian and Yang, Ziyi and Lyu, Xiaoyang and Cao, Yan-Pei and Qi, Xiaojuan}, title = {SC-GS: Sparse-Controlled Gaussian Splatting for Editable Dynamic Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4220-4230} }
Tri-Modal Motion Retrieval by Learning a Joint Embedding Space: Kangning Yin,

Shihao Zou,

Yuxuan Ge,

Zheng Tian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2024_CVPR, author = {Yin, Kangning and Zou, Shihao and Ge, Yuxuan and Tian, Zheng}, title = {Tri-Modal Motion Retrieval by Learning a Joint Embedding Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1596-1605} }
Geometry-aware Reconstruction and Fusion-refined Rendering for Generalizable Neural Radiance Fields: Tianqi Liu,

Xinyi Ye,

Min Shi,

Zihao Huang,

Zhiyu Pan,

Zhan Peng,

Zhiguo Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Tianqi and Ye, Xinyi and Shi, Min and Huang, Zihao and Pan, Zhiyu and Peng, Zhan and Cao, Zhiguo}, title = {Geometry-aware Reconstruction and Fusion-refined Rendering for Generalizable Neural Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7654-7663} }
VideoBooth: Diffusion-based Video Generation with Image Prompts: Yuming Jiang,

Tianxing Wu,

Shuai Yang,

Chenyang Si,

Dahua Lin,

Yu Qiao,

Chen Change Loy,

Ziwei Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2024_CVPR, author = {Jiang, Yuming and Wu, Tianxing and Yang, Shuai and Si, Chenyang and Lin, Dahua and Qiao, Yu and Loy, Chen Change and Liu, Ziwei}, title = {VideoBooth: Diffusion-based Video Generation with Image Prompts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6689-6700} }
SCULPT: Shape-Conditioned Unpaired Learning of Pose-dependent Clothed and Textured Human Meshes: Soubhik Sanyal,

Partha Ghosh,

Jinlong Yang,

Michael J. Black,

Justus Thies,

Timo Bolkart; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sanyal_2024_CVPR, author = {Sanyal, Soubhik and Ghosh, Partha and Yang, Jinlong and Black, Michael J. and Thies, Justus and Bolkart, Timo}, title = {SCULPT: Shape-Conditioned Unpaired Learning of Pose-dependent Clothed and Textured Human Meshes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2362-2371} }
EasyDrag: Efficient Point-based Manipulation on Diffusion Models: Xingzhong Hou,

Boxiao Liu,

Yi Zhang,

Jihao Liu,

Yu Liu,

Haihang You; [pdf] [supp]
[bibtex]
@InProceedings{Hou_2024_CVPR, author = {Hou, Xingzhong and Liu, Boxiao and Zhang, Yi and Liu, Jihao and Liu, Yu and You, Haihang}, title = {EasyDrag: Efficient Point-based Manipulation on Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8404-8413} }
InterHandGen: Two-Hand Interaction Generation via Cascaded Reverse Diffusion: Jihyun Lee,

Shunsuke Saito,

Giljoo Nam,

Minhyuk Sung,

Tae-Kyun Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2024_CVPR, author = {Lee, Jihyun and Saito, Shunsuke and Nam, Giljoo and Sung, Minhyuk and Kim, Tae-Kyun}, title = {InterHandGen: Two-Hand Interaction Generation via Cascaded Reverse Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {527-537} }
Video2Game: Real-time Interactive Realistic and Browser-Compatible Environment from a Single Video: Hongchi Xia,

Zhi-Hao Lin,

Wei-Chiu Ma,

Shenlong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2024_CVPR, author = {Xia, Hongchi and Lin, Zhi-Hao and Ma, Wei-Chiu and Wang, Shenlong}, title = {Video2Game: Real-time Interactive Realistic and Browser-Compatible Environment from a Single Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4578-4588} }
Tackling the Singularities at the Endpoints of Time Intervals in Diffusion Models: Pengze Zhang,

Hubery Yin,

Chen Li,

Xiaohua Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Pengze and Yin, Hubery and Li, Chen and Xie, Xiaohua}, title = {Tackling the Singularities at the Endpoints of Time Intervals in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6945-6954} }
CHAIN: Enhancing Generalization in Data-Efficient GANs via lipsCHitz continuity constrAIned Normalization: Yao Ni,

Piotr Koniusz; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ni_2024_CVPR, author = {Ni, Yao and Koniusz, Piotr}, title = {CHAIN: Enhancing Generalization in Data-Efficient GANs via lipsCHitz continuity constrAIned Normalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6763-6774} }
High-Quality Facial Geometry and Appearance Capture at Home: Yuxuan Han,

Junfeng Lyu,

Feng Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2024_CVPR, author = {Han, Yuxuan and Lyu, Junfeng and Xu, Feng}, title = {High-Quality Facial Geometry and Appearance Capture at Home}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {697-707} }
Your Image is My Video: Reshaping the Receptive Field via Image-To-Video Differentiable AutoAugmentation and Fusion: Sofia Casarin,

Cynthia I. Ugwu,

Sergio Escalera,

Oswald Lanz; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Casarin_2024_CVPR, author = {Casarin, Sofia and Ugwu, Cynthia I. and Escalera, Sergio and Lanz, Oswald}, title = {Your Image is My Video: Reshaping the Receptive Field via Image-To-Video Differentiable AutoAugmentation and Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5829-5839} }
SpikingResformer: Bridging ResNet and Vision Transformer in Spiking Neural Networks: Xinyu Shi,

Zecheng Hao,

Zhaofei Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2024_CVPR, author = {Shi, Xinyu and Hao, Zecheng and Yu, Zhaofei}, title = {SpikingResformer: Bridging ResNet and Vision Transformer in Spiking Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5610-5619} }
Self-Supervised Dual Contouring: Ramana Sundararaman,

Roman Klokov,

Maks Ovsjanikov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sundararaman_2024_CVPR, author = {Sundararaman, Ramana and Klokov, Roman and Ovsjanikov, Maks}, title = {Self-Supervised Dual Contouring}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4681-4691} }
GSVA: Generalized Segmentation via Multimodal Large Language Models: Zhuofan Xia,

Dongchen Han,

Yizeng Han,

Xuran Pan,

Shiji Song,

Gao Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2024_CVPR, author = {Xia, Zhuofan and Han, Dongchen and Han, Yizeng and Pan, Xuran and Song, Shiji and Huang, Gao}, title = {GSVA: Generalized Segmentation via Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3858-3869} }
AdaBM: On-the-Fly Adaptive Bit Mapping for Image Super-Resolution: Cheeun Hong,

Kyoung Mu Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hong_2024_CVPR, author = {Hong, Cheeun and Lee, Kyoung Mu}, title = {AdaBM: On-the-Fly Adaptive Bit Mapping for Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2641-2650} }
SVGDreamer: Text Guided SVG Generation with Diffusion Model: Ximing Xing,

Haitao Zhou,

Chuang Wang,

Jing Zhang,

Dong Xu,

Qian Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xing_2024_CVPR, author = {Xing, Ximing and Zhou, Haitao and Wang, Chuang and Zhang, Jing and Xu, Dong and Yu, Qian}, title = {SVGDreamer: Text Guided SVG Generation with Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4546-4555} }
BlockGCN: Redefine Topology Awareness for Skeleton-Based Action Recognition: Yuxuan Zhou,

Xudong Yan,

Zhi-Qi Cheng,

Yan Yan,

Qi Dai,

Xian-Sheng Hua; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2024_CVPR, author = {Zhou, Yuxuan and Yan, Xudong and Cheng, Zhi-Qi and Yan, Yan and Dai, Qi and Hua, Xian-Sheng}, title = {BlockGCN: Redefine Topology Awareness for Skeleton-Based Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2049-2058} }
Structure-Guided Adversarial Training of Diffusion Models: Ling Yang,

Haotian Qian,

Zhilong Zhang,

Jingwei Liu,

Bin Cui; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Ling and Qian, Haotian and Zhang, Zhilong and Liu, Jingwei and Cui, Bin}, title = {Structure-Guided Adversarial Training of Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7256-7266} }
NIFTY: Neural Object Interaction Fields for Guided Human Motion Synthesis: Nilesh Kulkarni,

Davis Rempe,

Kyle Genova,

Abhijit Kundu,

Justin Johnson,

David Fouhey,

Leonidas Guibas; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kulkarni_2024_CVPR, author = {Kulkarni, Nilesh and Rempe, Davis and Genova, Kyle and Kundu, Abhijit and Johnson, Justin and Fouhey, David and Guibas, Leonidas}, title = {NIFTY: Neural Object Interaction Fields for Guided Human Motion Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {947-957} }
Can Language Beat Numerical Regression? Language-Based Multimodal Trajectory Prediction: Inhwan Bae,

Junoh Lee,

Hae-Gon Jeon; [pdf] [arXiv]
[bibtex]
@InProceedings{Bae_2024_CVPR, author = {Bae, Inhwan and Lee, Junoh and Jeon, Hae-Gon}, title = {Can Language Beat Numerical Regression? Language-Based Multimodal Trajectory Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {753-766} }
Building Optimal Neural Architectures using Interpretable Knowledge: Keith G. Mills,

Fred X. Han,

Mohammad Salameh,

Shengyao Lu,

Chunhua Zhou,

Jiao He,

Fengyu Sun,

Di Niu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mills_2024_CVPR, author = {Mills, Keith G. and Han, Fred X. and Salameh, Mohammad and Lu, Shengyao and Zhou, Chunhua and He, Jiao and Sun, Fengyu and Niu, Di}, title = {Building Optimal Neural Architectures using Interpretable Knowledge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5726-5735} }
Holo-Relighting: Controllable Volumetric Portrait Relighting from a Single Image: Yiqun Mei,

Yu Zeng,

He Zhang,

Zhixin Shu,

Xuaner Zhang,

Sai Bi,

Jianming Zhang,

HyunJoon Jung,

Vishal M. Patel; [pdf] [supp]
[bibtex]
@InProceedings{Mei_2024_CVPR, author = {Mei, Yiqun and Zeng, Yu and Zhang, He and Shu, Zhixin and Zhang, Xuaner and Bi, Sai and Zhang, Jianming and Jung, HyunJoon and Patel, Vishal M.}, title = {Holo-Relighting: Controllable Volumetric Portrait Relighting from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4263-4273} }
Noisy One-point Homographies are Surprisingly Good: Yaqing Ding,

Jonathan Astermark,

Magnus Oskarsson,

Viktor Larsson; [pdf] [supp]
[bibtex]
@InProceedings{Ding_2024_CVPR, author = {Ding, Yaqing and Astermark, Jonathan and Oskarsson, Magnus and Larsson, Viktor}, title = {Noisy One-point Homographies are Surprisingly Good}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5125-5134} }
Panacea: Panoramic and Controllable Video Generation for Autonomous Driving: Yuqing Wen,

Yucheng Zhao,

Yingfei Liu,

Fan Jia,

Yanhui Wang,

Chong Luo,

Chi Zhang,

Tiancai Wang,

Xiaoyan Sun,

Xiangyu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wen_2024_CVPR, author = {Wen, Yuqing and Zhao, Yucheng and Liu, Yingfei and Jia, Fan and Wang, Yanhui and Luo, Chong and Zhang, Chi and Wang, Tiancai and Sun, Xiaoyan and Zhang, Xiangyu}, title = {Panacea: Panoramic and Controllable Video Generation for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6902-6912} }
DreamMatcher: Appearance Matching Self-Attention for Semantically-Consistent Text-to-Image Personalization: Jisu Nam,

Heesu Kim,

DongJae Lee,

Siyoon Jin,

Seungryong Kim,

Seunggyu Chang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nam_2024_CVPR, author = {Nam, Jisu and Kim, Heesu and Lee, DongJae and Jin, Siyoon and Kim, Seungryong and Chang, Seunggyu}, title = {DreamMatcher: Appearance Matching Self-Attention for Semantically-Consistent Text-to-Image Personalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8100-8110} }
PolarMatte: Fully Computational Ground-Truth-Quality Alpha Matte Extraction for Images and Video using Polarized Screen Matting: Kenji Enomoto,

TJ Rhodes,

Brian Price,

Gavin Miller; [pdf] [supp]
[bibtex]
@InProceedings{Enomoto_2024_CVPR, author = {Enomoto, Kenji and Rhodes, TJ and Price, Brian and Miller, Gavin}, title = {PolarMatte: Fully Computational Ground-Truth-Quality Alpha Matte Extraction for Images and Video using Polarized Screen Matting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3901-3909} }
HOIDiffusion: Generating Realistic 3D Hand-Object Interaction Data: Mengqi Zhang,

Yang Fu,

Zheng Ding,

Sifei Liu,

Zhuowen Tu,

Xiaolong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Mengqi and Fu, Yang and Ding, Zheng and Liu, Sifei and Tu, Zhuowen and Wang, Xiaolong}, title = {HOIDiffusion: Generating Realistic 3D Hand-Object Interaction Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8521-8531} }
VecFusion: Vector Font Generation with Diffusion: Vikas Thamizharasan,

Difan Liu,

Shantanu Agarwal,

Matthew Fisher,

Michael Gharbi,

Oliver Wang,

Alec Jacobson,

Evangelos Kalogerakis; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Thamizharasan_2024_CVPR, author = {Thamizharasan, Vikas and Liu, Difan and Agarwal, Shantanu and Fisher, Matthew and Gharbi, Michael and Wang, Oliver and Jacobson, Alec and Kalogerakis, Evangelos}, title = {VecFusion: Vector Font Generation with Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7943-7952} }
Towards Text-guided 3D Scene Composition: Qihang Zhang,

Chaoyang Wang,

Aliaksandr Siarohin,

Peiye Zhuang,

Yinghao Xu,

Ceyuan Yang,

Dahua Lin,

Bolei Zhou,

Sergey Tulyakov,

Hsin-Ying Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Qihang and Wang, Chaoyang and Siarohin, Aliaksandr and Zhuang, Peiye and Xu, Yinghao and Yang, Ceyuan and Lin, Dahua and Zhou, Bolei and Tulyakov, Sergey and Lee, Hsin-Ying}, title = {Towards Text-guided 3D Scene Composition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6829-6838} }
EMAGE: Towards Unified Holistic Co-Speech Gesture Generation via Expressive Masked Audio Gesture Modeling: Haiyang Liu,

Zihao Zhu,

Giorgio Becherini,

Yichen Peng,

Mingyang Su,

You Zhou,

Xuefei Zhe,

Naoya Iwamoto,

Bo Zheng,

Michael J. Black; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Haiyang and Zhu, Zihao and Becherini, Giorgio and Peng, Yichen and Su, Mingyang and Zhou, You and Zhe, Xuefei and Iwamoto, Naoya and Zheng, Bo and Black, Michael J.}, title = {EMAGE: Towards Unified Holistic Co-Speech Gesture Generation via Expressive Masked Audio Gesture Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1144-1154} }
Adversarial Text to Continuous Image Generation: Kilichbek Haydarov,

Aashiq Muhamed,

Xiaoqian Shen,

Jovana Lazarevic,

Ivan Skorokhodov,

Chamuditha Jayanga Galappaththige,

Mohamed Elhoseiny; [pdf] [supp]
[bibtex]
@InProceedings{Haydarov_2024_CVPR, author = {Haydarov, Kilichbek and Muhamed, Aashiq and Shen, Xiaoqian and Lazarevic, Jovana and Skorokhodov, Ivan and Galappaththige, Chamuditha Jayanga and Elhoseiny, Mohamed}, title = {Adversarial Text to Continuous Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6316-6326} }
HumanNeRF-SE: A Simple yet Effective Approach to Animate HumanNeRF with Diverse Poses: Caoyuan Ma,

Yu-Lun Liu,

Zhixiang Wang,

Wu Liu,

Xinchen Liu,

Zheng Wang; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2024_CVPR, author = {Ma, Caoyuan and Liu, Yu-Lun and Wang, Zhixiang and Liu, Wu and Liu, Xinchen and Wang, Zheng}, title = {HumanNeRF-SE: A Simple yet Effective Approach to Animate HumanNeRF with Diverse Poses}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1460-1470} }
HOLD: Category-agnostic 3D Reconstruction of Interacting Hands and Objects from Video: Zicong Fan,

Maria Parelli,

Maria Eleni Kadoglou,

Xu Chen,

Muhammed Kocabas,

Michael J. Black,

Otmar Hilliges; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2024_CVPR, author = {Fan, Zicong and Parelli, Maria and Kadoglou, Maria Eleni and Chen, Xu and Kocabas, Muhammed and Black, Michael J. and Hilliges, Otmar}, title = {HOLD: Category-agnostic 3D Reconstruction of Interacting Hands and Objects from Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {494-504} }
Continual Segmentation with Disentangled Objectness Learning and Class Recognition: Yizheng Gong,

Siyue Yu,

Xiaoyang Wang,

Jimin Xiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gong_2024_CVPR, author = {Gong, Yizheng and Yu, Siyue and Wang, Xiaoyang and Xiao, Jimin}, title = {Continual Segmentation with Disentangled Objectness Learning and Class Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3848-3857} }
ASAM: Boosting Segment Anything Model with Adversarial Tuning: Bo Li,

Haoke Xiao,

Lv Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Bo and Xiao, Haoke and Tang, Lv}, title = {ASAM: Boosting Segment Anything Model with Adversarial Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3699-3710} }
Dynamic Support Information Mining for Category-Agnostic Pose Estimation: Pengfei Ren,

Yuanyuan Gao,

Haifeng Sun,

Qi Qi,

Jingyu Wang,

Jianxin Liao; [pdf] [supp]
[bibtex]
@InProceedings{Ren_2024_CVPR, author = {Ren, Pengfei and Gao, Yuanyuan and Sun, Haifeng and Qi, Qi and Wang, Jingyu and Liao, Jianxin}, title = {Dynamic Support Information Mining for Category-Agnostic Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1921-1930} }
Taming Mode Collapse in Score Distillation for Text-to-3D Generation: Peihao Wang,

Dejia Xu,

Zhiwen Fan,

Dilin Wang,

Sreyas Mohan,

Forrest Iandola,

Rakesh Ranjan,

Yilei Li,

Qiang Liu,

Zhangyang Wang,

Vikas Chandra; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Peihao and Xu, Dejia and Fan, Zhiwen and Wang, Dilin and Mohan, Sreyas and Iandola, Forrest and Ranjan, Rakesh and Li, Yilei and Liu, Qiang and Wang, Zhangyang and Chandra, Vikas}, title = {Taming Mode Collapse in Score Distillation for Text-to-3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9037-9047} }
MagicAnimate: Temporally Consistent Human Image Animation using Diffusion Model: Zhongcong Xu,

Jianfeng Zhang,

Jun Hao Liew,

Hanshu Yan,

Jia-Wei Liu,

Chenxu Zhang,

Jiashi Feng,

Mike Zheng Shou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Zhongcong and Zhang, Jianfeng and Liew, Jun Hao and Yan, Hanshu and Liu, Jia-Wei and Zhang, Chenxu and Feng, Jiashi and Shou, Mike Zheng}, title = {MagicAnimate: Temporally Consistent Human Image Animation using Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1481-1490} }
From Correspondences to Pose: Non-minimal Certifiably Optimal Relative Pose without Disambiguation: Javier Tirado-Garín,

Javier Civera; [pdf] [supp]
[bibtex]
@InProceedings{Tirado-Garin_2024_CVPR, author = {Tirado-Gar{\'\i}n, Javier and Civera, Javier}, title = {From Correspondences to Pose: Non-minimal Certifiably Optimal Relative Pose without Disambiguation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {403-412} }
Loose Inertial Poser: Motion Capture with IMU-attached Loose-Wear Jacket: Chengxu Zuo,

Yiming Wang,

Lishuang Zhan,

Shihui Guo,

Xinyu Yi,

Feng Xu,

Yipeng Qin; [pdf] [supp]
[bibtex]
@InProceedings{Zuo_2024_CVPR, author = {Zuo, Chengxu and Wang, Yiming and Zhan, Lishuang and Guo, Shihui and Yi, Xinyu and Xu, Feng and Qin, Yipeng}, title = {Loose Inertial Poser: Motion Capture with IMU-attached Loose-Wear Jacket}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2209-2219} }
Training-Free Pretrained Model Merging: Zhengqi Xu,

Ke Yuan,

Huiqiong Wang,

Yong Wang,

Mingli Song,

Jie Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Zhengqi and Yuan, Ke and Wang, Huiqiong and Wang, Yong and Song, Mingli and Song, Jie}, title = {Training-Free Pretrained Model Merging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5915-5925} }
NC-SDF: Enhancing Indoor Scene Reconstruction Using Neural SDFs with View-Dependent Normal Compensation: Ziyi Chen,

Xiaolong Wu,

Yu Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Ziyi and Wu, Xiaolong and Zhang, Yu}, title = {NC-SDF: Enhancing Indoor Scene Reconstruction Using Neural SDFs with View-Dependent Normal Compensation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5155-5165} }
Person in Place: Generating Associative Skeleton-Guidance Maps for Human-Object Interaction Image Editing: ChangHee Yang,

ChanHee Kang,

Kyeongbo Kong,

Hanni Oh,

Suk-Ju Kang; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, ChangHee and Kang, ChanHee and Kong, Kyeongbo and Oh, Hanni and Kang, Suk-Ju}, title = {Person in Place: Generating Associative Skeleton-Guidance Maps for Human-Object Interaction Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8164-8175} }
ChatPose: Chatting about 3D Human Pose: Yao Feng,

Jing Lin,

Sai Kumar Dwivedi,

Yu Sun,

Priyanka Patel,

Michael J. Black; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2024_CVPR, author = {Feng, Yao and Lin, Jing and Dwivedi, Sai Kumar and Sun, Yu and Patel, Priyanka and Black, Michael J.}, title = {ChatPose: Chatting about 3D Human Pose}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2093-2103} }
Distilling ODE Solvers of Diffusion Models into Smaller Steps: Sanghwan Kim,

Hao Tang,

Fisher Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2024_CVPR, author = {Kim, Sanghwan and Tang, Hao and Yu, Fisher}, title = {Distilling ODE Solvers of Diffusion Models into Smaller Steps}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9410-9419} }
LightIt: Illumination Modeling and Control for Diffusion Models: Peter Kocsis,

Julien Philip,

Kalyan Sunkavalli,

Matthias Nießner,

Yannick Hold-Geoffroy; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kocsis_2024_CVPR, author = {Kocsis, Peter and Philip, Julien and Sunkavalli, Kalyan and Nie{\ss}ner, Matthias and Hold-Geoffroy, Yannick}, title = {LightIt: Illumination Modeling and Control for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9359-9369} }
Neural Lineage: Runpeng Yu,

Xinchao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2024_CVPR, author = {Yu, Runpeng and Wang, Xinchao}, title = {Neural Lineage}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4797-4807} }
Visual Layout Composer: Image-Vector Dual Diffusion Model for Design Layout Generation: Mohammad Amin Shabani,

Zhaowen Wang,

Difan Liu,

Nanxuan Zhao,

Jimei Yang,

Yasutaka Furukawa; [pdf] [supp]
[bibtex]
@InProceedings{Shabani_2024_CVPR, author = {Shabani, Mohammad Amin and Wang, Zhaowen and Liu, Difan and Zhao, Nanxuan and Yang, Jimei and Furukawa, Yasutaka}, title = {Visual Layout Composer: Image-Vector Dual Diffusion Model for Design Layout Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9222-9231} }
3D Multi-frame Fusion for Video Stabilization: Zhan Peng,

Xinyi Ye,

Weiyue Zhao,

Tianqi Liu,

Huiqiang Sun,

Baopu Li,

Zhiguo Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2024_CVPR, author = {Peng, Zhan and Ye, Xinyi and Zhao, Weiyue and Liu, Tianqi and Sun, Huiqiang and Li, Baopu and Cao, Zhiguo}, title = {3D Multi-frame Fusion for Video Stabilization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7507-7516} }
Local-consistent Transformation Learning for Rotation-invariant Point Cloud Analysis: Yiyang Chen,

Lunhao Duan,

Shanshan Zhao,

Changxing Ding,

Dacheng Tao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Yiyang and Duan, Lunhao and Zhao, Shanshan and Ding, Changxing and Tao, Dacheng}, title = {Local-consistent Transformation Learning for Rotation-invariant Point Cloud Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5418-5427} }
Tailored Visions: Enhancing Text-to-Image Generation with Personalized Prompt Rewriting: Zijie Chen,

Lichao Zhang,

Fangsheng Weng,

Lili Pan,

Zhenzhong Lan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Zijie and Zhang, Lichao and Weng, Fangsheng and Pan, Lili and Lan, Zhenzhong}, title = {Tailored Visions: Enhancing Text-to-Image Generation with Personalized Prompt Rewriting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7727-7736} }
Efficient Deformable ConvNets: Rethinking Dynamic and Sparse Operator for Vision Applications: Yuwen Xiong,

Zhiqi Li,

Yuntao Chen,

Feng Wang,

Xizhou Zhu,

Jiapeng Luo,

Wenhai Wang,

Tong Lu,

Hongsheng Li,

Yu Qiao,

Lewei Lu,

Jie Zhou,

Jifeng Dai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiong_2024_CVPR, author = {Xiong, Yuwen and Li, Zhiqi and Chen, Yuntao and Wang, Feng and Zhu, Xizhou and Luo, Jiapeng and Wang, Wenhai and Lu, Tong and Li, Hongsheng and Qiao, Yu and Lu, Lewei and Zhou, Jie and Dai, Jifeng}, title = {Efficient Deformable ConvNets: Rethinking Dynamic and Sparse Operator for Vision Applications}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5652-5661} }
CoDe: An Explicit Content Decoupling Framework for Image Restoration: Enxuan Gu,

Hongwei Ge,

Yong Guo; [pdf] [supp]
[bibtex]
@InProceedings{Gu_2024_CVPR, author = {Gu, Enxuan and Ge, Hongwei and Guo, Yong}, title = {CoDe: An Explicit Content Decoupling Framework for Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2920-2930} }
DreamVideo: Composing Your Dream Videos with Customized Subject and Motion: Yujie Wei,

Shiwei Zhang,

Zhiwu Qing,

Hangjie Yuan,

Zhiheng Liu,

Yu Liu,

Yingya Zhang,

Jingren Zhou,

Hongming Shan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2024_CVPR, author = {Wei, Yujie and Zhang, Shiwei and Qing, Zhiwu and Yuan, Hangjie and Liu, Zhiheng and Liu, Yu and Zhang, Yingya and Zhou, Jingren and Shan, Hongming}, title = {DreamVideo: Composing Your Dream Videos with Customized Subject and Motion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6537-6549} }
Using Human Feedback to Fine-tune Diffusion Models without Any Reward Model: Kai Yang,

Jian Tao,

Jiafei Lyu,

Chunjiang Ge,

Jiaxin Chen,

Weihan Shen,

Xiaolong Zhu,

Xiu Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Kai and Tao, Jian and Lyu, Jiafei and Ge, Chunjiang and Chen, Jiaxin and Shen, Weihan and Zhu, Xiaolong and Li, Xiu}, title = {Using Human Feedback to Fine-tune Diffusion Models without Any Reward Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8941-8951} }
SynSP: Synergy of Smoothness and Precision in Pose Sequences Refinement: Tao Wang,

Lei Jin,

Zheng Wang,

Jianshu Li,

Liang Li,

Fang Zhao,

Yu Cheng,

Li Yuan,

Li Zhou,

Junliang Xing,

Jian Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Tao and Jin, Lei and Wang, Zheng and Li, Jianshu and Li, Liang and Zhao, Fang and Cheng, Yu and Yuan, Li and Zhou, Li and Xing, Junliang and Zhao, Jian}, title = {SynSP: Synergy of Smoothness and Precision in Pose Sequences Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1824-1833} }
Learned Representation-Guided Diffusion Models for Large-Image Generation: Alexandros Graikos,

Srikar Yellapragada,

Minh-Quan Le,

Saarthak Kapse,

Prateek Prasanna,

Joel Saltz,

Dimitris Samaras; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Graikos_2024_CVPR, author = {Graikos, Alexandros and Yellapragada, Srikar and Le, Minh-Quan and Kapse, Saarthak and Prasanna, Prateek and Saltz, Joel and Samaras, Dimitris}, title = {Learned Representation-Guided Diffusion Models for Large-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8532-8542} }
Ranni: Taming Text-to-Image Diffusion for Accurate Instruction Following: Yutong Feng,

Biao Gong,

Di Chen,

Yujun Shen,

Yu Liu,

Jingren Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2024_CVPR, author = {Feng, Yutong and Gong, Biao and Chen, Di and Shen, Yujun and Liu, Yu and Zhou, Jingren}, title = {Ranni: Taming Text-to-Image Diffusion for Accurate Instruction Following}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4744-4753} }
Direct2.5: Diverse Text-to-3D Generation via Multi-view 2.5D Diffusion: Yuanxun Lu,

Jingyang Zhang,

Shiwei Li,

Tian Fang,

David McKinnon,

Yanghai Tsin,

Long Quan,

Xun Cao,

Yao Yao; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2024_CVPR, author = {Lu, Yuanxun and Zhang, Jingyang and Li, Shiwei and Fang, Tian and McKinnon, David and Tsin, Yanghai and Quan, Long and Cao, Xun and Yao, Yao}, title = {Direct2.5: Diverse Text-to-3D Generation via Multi-view 2.5D Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8744-8753} }
MatFuse: Controllable Material Generation with Diffusion Models: Giuseppe Vecchio,

Renato Sortino,

Simone Palazzo,

Concetto Spampinato; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vecchio_2024_CVPR, author = {Vecchio, Giuseppe and Sortino, Renato and Palazzo, Simone and Spampinato, Concetto}, title = {MatFuse: Controllable Material Generation with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4429-4438} }
Training Vision Transformers for Semi-Supervised Semantic Segmentation: Xinting Hu,

Li Jiang,

Bernt Schiele; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2024_CVPR, author = {Hu, Xinting and Jiang, Li and Schiele, Bernt}, title = {Training Vision Transformers for Semi-Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4007-4017} }
Quantifying Task Priority for Multi-Task Optimization: Wooseong Jeong,

Kuk-Jin Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeong_2024_CVPR, author = {Jeong, Wooseong and Yoon, Kuk-Jin}, title = {Quantifying Task Priority for Multi-Task Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {363-372} }
On the Scalability of Diffusion-based Text-to-Image Generation: Hao Li,

Yang Zou,

Ying Wang,

Orchid Majumder,

Yusheng Xie,

R. Manmatha,

Ashwin Swaminathan,

Zhuowen Tu,

Stefano Ermon,

Stefano Soatto; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Hao and Zou, Yang and Wang, Ying and Majumder, Orchid and Xie, Yusheng and Manmatha, R. and Swaminathan, Ashwin and Tu, Zhuowen and Ermon, Stefano and Soatto, Stefano}, title = {On the Scalability of Diffusion-based Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9400-9409} }
AnySkill: Learning Open-Vocabulary Physical Skill for Interactive Agents: Jieming Cui,

Tengyu Liu,

Nian Liu,

Yaodong Yang,

Yixin Zhu,

Siyuan Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cui_2024_CVPR, author = {Cui, Jieming and Liu, Tengyu and Liu, Nian and Yang, Yaodong and Zhu, Yixin and Huang, Siyuan}, title = {AnySkill: Learning Open-Vocabulary Physical Skill for Interactive Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {852-862} }
Generative Unlearning for Any Identity: Juwon Seo,

Sung-Hoon Lee,

Tae-Young Lee,

Seungjun Moon,

Gyeong-Moon Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Seo_2024_CVPR, author = {Seo, Juwon and Lee, Sung-Hoon and Lee, Tae-Young and Moon, Seungjun and Park, Gyeong-Moon}, title = {Generative Unlearning for Any Identity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9151-9161} }
FlowVid: Taming Imperfect Optical Flows for Consistent Video-to-Video Synthesis: Feng Liang,

Bichen Wu,

Jialiang Wang,

Licheng Yu,

Kunpeng Li,

Yinan Zhao,

Ishan Misra,

Jia-Bin Huang,

Peizhao Zhang,

Peter Vajda,

Diana Marculescu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2024_CVPR, author = {Liang, Feng and Wu, Bichen and Wang, Jialiang and Yu, Licheng and Li, Kunpeng and Zhao, Yinan and Misra, Ishan and Huang, Jia-Bin and Zhang, Peizhao and Vajda, Peter and Marculescu, Diana}, title = {FlowVid: Taming Imperfect Optical Flows for Consistent Video-to-Video Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8207-8216} }
StyleCineGAN: Landscape Cinemagraph Generation using a Pre-trained StyleGAN: Jongwoo Choi,

Kwanggyoon Seo,

Amirsaman Ashtari,

Junyong Noh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2024_CVPR, author = {Choi, Jongwoo and Seo, Kwanggyoon and Ashtari, Amirsaman and Noh, Junyong}, title = {StyleCineGAN: Landscape Cinemagraph Generation using a Pre-trained StyleGAN}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7872-7881} }
Laplacian-guided Entropy Model in Neural Codec with Blur-dissipated Synthesis: Atefeh Khoshkhahtinat,

Ali Zafari,

Piyush M. Mehta,

Nasser M. Nasrabadi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Khoshkhahtinat_2024_CVPR, author = {Khoshkhahtinat, Atefeh and Zafari, Ali and Mehta, Piyush M. and Nasrabadi, Nasser M.}, title = {Laplacian-guided Entropy Model in Neural Codec with Blur-dissipated Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3045-3054} }
RMT: Retentive Networks Meet Vision Transformers: Qihang Fan,

Huaibo Huang,

Mingrui Chen,

Hongmin Liu,

Ran He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2024_CVPR, author = {Fan, Qihang and Huang, Huaibo and Chen, Mingrui and Liu, Hongmin and He, Ran}, title = {RMT: Retentive Networks Meet Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5641-5651} }
Multimodal Pathway: Improve Transformers with Irrelevant Data from Other Modalities: Yiyuan Zhang,

Xiaohan Ding,

Kaixiong Gong,

Yixiao Ge,

Ying Shan,

Xiangyu Yue; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yiyuan and Ding, Xiaohan and Gong, Kaixiong and Ge, Yixiao and Shan, Ying and Yue, Xiangyu}, title = {Multimodal Pathway: Improve Transformers with Irrelevant Data from Other Modalities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6108-6117} }
FaceChain-ImagineID: Freely Crafting High-Fidelity Diverse Talking Faces from Disentangled Audio: Chao Xu,

Yang Liu,

Jiazheng Xing,

Weida Wang,

Mingze Sun,

Jun Dan,

Tianxin Huang,

Siyuan Li,

Zhi-Qi Cheng,

Ying Tai,

Baigui Sun; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Chao and Liu, Yang and Xing, Jiazheng and Wang, Weida and Sun, Mingze and Dan, Jun and Huang, Tianxin and Li, Siyuan and Cheng, Zhi-Qi and Tai, Ying and Sun, Baigui}, title = {FaceChain-ImagineID: Freely Crafting High-Fidelity Diverse Talking Faces from Disentangled Audio}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1292-1302} }
SSR-Encoder: Encoding Selective Subject Representation for Subject-Driven Generation: Yuxuan Zhang,

Yiren Song,

Jiaming Liu,

Rui Wang,

Jinpeng Yu,

Hao Tang,

Huaxia Li,

Xu Tang,

Yao Hu,

Han Pan,

Zhongliang Jing; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yuxuan and Song, Yiren and Liu, Jiaming and Wang, Rui and Yu, Jinpeng and Tang, Hao and Li, Huaxia and Tang, Xu and Hu, Yao and Pan, Han and Jing, Zhongliang}, title = {SSR-Encoder: Encoding Selective Subject Representation for Subject-Driven Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8069-8078} }
MVIP-NeRF: Multi-view 3D Inpainting on NeRF Scenes via Diffusion Prior: Honghua Chen,

Chen Change Loy,

Xingang Pan; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Honghua and Loy, Chen Change and Pan, Xingang}, title = {MVIP-NeRF: Multi-view 3D Inpainting on NeRF Scenes via Diffusion Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5344-5353} }
StegoGAN: Leveraging Steganography for Non-Bijective Image-to-Image Translation: Sidi Wu,

Yizi Chen,

Samuel Mermet,

Lorenz Hurni,

Konrad Schindler,

Nicolas Gonthier,

Loic Landrieu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, Sidi and Chen, Yizi and Mermet, Samuel and Hurni, Lorenz and Schindler, Konrad and Gonthier, Nicolas and Landrieu, Loic}, title = {StegoGAN: Leveraging Steganography for Non-Bijective Image-to-Image Translation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7922-7931} }
M&M VTO: Multi-Garment Virtual Try-On and Editing: Luyang Zhu,

Yingwei Li,

Nan Liu,

Hao Peng,

Dawei Yang,

Ira Kemelmacher-Shlizerman; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2024_CVPR, author = {Zhu, Luyang and Li, Yingwei and Liu, Nan and Peng, Hao and Yang, Dawei and Kemelmacher-Shlizerman, Ira}, title = {M\&M VTO: Multi-Garment Virtual Try-On and Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1346-1356} }
Dynamic Inertial Poser (DynaIP): Part-Based Motion Dynamics Learning for Enhanced Human Pose Estimation with Sparse Inertial Sensors: Yu Zhang,

Songpengcheng Xia,

Lei Chu,

Jiarui Yang,

Qi Wu,

Ling Pei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yu and Xia, Songpengcheng and Chu, Lei and Yang, Jiarui and Wu, Qi and Pei, Ling}, title = {Dynamic Inertial Poser (DynaIP): Part-Based Motion Dynamics Learning for Enhanced Human Pose Estimation with Sparse Inertial Sensors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1889-1899} }
GraCo: Granularity-Controllable Interactive Segmentation: Yian Zhao,

Kehan Li,

Zesen Cheng,

Pengchong Qiao,

Xiawu Zheng,

Rongrong Ji,

Chang Liu,

Li Yuan,

Jie Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2024_CVPR, author = {Zhao, Yian and Li, Kehan and Cheng, Zesen and Qiao, Pengchong and Zheng, Xiawu and Ji, Rongrong and Liu, Chang and Yuan, Li and Chen, Jie}, title = {GraCo: Granularity-Controllable Interactive Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3501-3510} }
G-HOP: Generative Hand-Object Prior for Interaction Reconstruction and Grasp Synthesis: Yufei Ye,

Abhinav Gupta,

Kris Kitani,

Shubham Tulsiani; [pdf] [supp]
[bibtex]
@InProceedings{Ye_2024_CVPR, author = {Ye, Yufei and Gupta, Abhinav and Kitani, Kris and Tulsiani, Shubham}, title = {G-HOP: Generative Hand-Object Prior for Interaction Reconstruction and Grasp Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1911-1920} }
Contrastive Denoising Score for Text-guided Latent Diffusion Image Editing: Hyelin Nam,

Gihyun Kwon,

Geon Yeong Park,

Jong Chul Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nam_2024_CVPR, author = {Nam, Hyelin and Kwon, Gihyun and Park, Geon Yeong and Ye, Jong Chul}, title = {Contrastive Denoising Score for Text-guided Latent Diffusion Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9192-9201} }
Neural Point Cloud Diffusion for Disentangled 3D Shape and Appearance Generation: Philipp Schröppel,

Christopher Wewer,

Jan Eric Lenssen,

Eddy Ilg,

Thomas Brox; [pdf] [supp]
[bibtex]
@InProceedings{Schroppel_2024_CVPR, author = {Schr\"oppel, Philipp and Wewer, Christopher and Lenssen, Jan Eric and Ilg, Eddy and Brox, Thomas}, title = {Neural Point Cloud Diffusion for Disentangled 3D Shape and Appearance Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8785-8794} }
VAREN: Very Accurate and Realistic Equine Network: Silvia Zuffi,

Ylva Mellbin,

Ci Li,

Markus Hoeschle,

Hedvig Kjellström,

Senya Polikovsky,

Elin Hernlund,

Michael J. Black; [pdf] [supp]
[bibtex]
@InProceedings{Zuffi_2024_CVPR, author = {Zuffi, Silvia and Mellbin, Ylva and Li, Ci and Hoeschle, Markus and Kjellstr\"om, Hedvig and Polikovsky, Senya and Hernlund, Elin and Black, Michael J.}, title = {VAREN: Very Accurate and Realistic Equine Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5374-5383} }
SD-DiT: Unleashing the Power of Self-supervised Discrimination in Diffusion Transformer: Rui Zhu,

Yingwei Pan,

Yehao Li,

Ting Yao,

Zhenglong Sun,

Tao Mei,

Chang Wen Chen; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2024_CVPR, author = {Zhu, Rui and Pan, Yingwei and Li, Yehao and Yao, Ting and Sun, Zhenglong and Mei, Tao and Chen, Chang Wen}, title = {SD-DiT: Unleashing the Power of Self-supervised Discrimination in Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8435-8445} }
MedBN: Robust Test-Time Adaptation against Malicious Test Samples: Hyejin Park,

Jeongyeon Hwang,

Sunung Mun,

Sangdon Park,

Jungseul Ok; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2024_CVPR, author = {Park, Hyejin and Hwang, Jeongyeon and Mun, Sunung and Park, Sangdon and Ok, Jungseul}, title = {MedBN: Robust Test-Time Adaptation against Malicious Test Samples}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5997-6007} }
Unsupervised Gaze Representation Learning from Multi-view Face Images: Yiwei Bao,

Feng Lu; [pdf]
[bibtex]
@InProceedings{Bao_2024_CVPR, author = {Bao, Yiwei and Lu, Feng}, title = {Unsupervised Gaze Representation Learning from Multi-view Face Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1419-1428} }
AEROBLADE: Training-Free Detection of Latent Diffusion Images Using Autoencoder Reconstruction Error: Jonas Ricker,

Denis Lukovnikov,

Asja Fischer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ricker_2024_CVPR, author = {Ricker, Jonas and Lukovnikov, Denis and Fischer, Asja}, title = {AEROBLADE: Training-Free Detection of Latent Diffusion Images Using Autoencoder Reconstruction Error}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9130-9140} }
Point2CAD: Reverse Engineering CAD Models from 3D Point Clouds: Yujia Liu,

Anton Obukhov,

Jan Dirk Wegner,

Konrad Schindler; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Yujia and Obukhov, Anton and Wegner, Jan Dirk and Schindler, Konrad}, title = {Point2CAD: Reverse Engineering CAD Models from 3D Point Clouds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3763-3772} }
LocLLM: Exploiting Generalizable Human Keypoint Localization via Large Language Model: Dongkai Wang,

Shiyu Xuan,

Shiliang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Dongkai and Xuan, Shiyu and Zhang, Shiliang}, title = {LocLLM: Exploiting Generalizable Human Keypoint Localization via Large Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {614-623} }
MMA-Diffusion: MultiModal Attack on Diffusion Models: Yijun Yang,

Ruiyuan Gao,

Xiaosen Wang,

Tsung-Yi Ho,

Nan Xu,

Qiang Xu; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Yijun and Gao, Ruiyuan and Wang, Xiaosen and Ho, Tsung-Yi and Xu, Nan and Xu, Qiang}, title = {MMA-Diffusion: MultiModal Attack on Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7737-7746} }
HanDiffuser: Text-to-Image Generation With Realistic Hand Appearances: Supreeth Narasimhaswamy,

Uttaran Bhattacharya,

Xiang Chen,

Ishita Dasgupta,

Saayan Mitra,

Minh Hoai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Narasimhaswamy_2024_CVPR, author = {Narasimhaswamy, Supreeth and Bhattacharya, Uttaran and Chen, Xiang and Dasgupta, Ishita and Mitra, Saayan and Hoai, Minh}, title = {HanDiffuser: Text-to-Image Generation With Realistic Hand Appearances}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2468-2479} }
Hierarchical Patch Diffusion Models for High-Resolution Video Generation: Ivan Skorokhodov,

Willi Menapace,

Aliaksandr Siarohin,

Sergey Tulyakov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Skorokhodov_2024_CVPR, author = {Skorokhodov, Ivan and Menapace, Willi and Siarohin, Aliaksandr and Tulyakov, Sergey}, title = {Hierarchical Patch Diffusion Models for High-Resolution Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7569-7579} }
Neural Implicit Morphing of Face Images: Guilherme Schardong,

Tiago Novello,

Hallison Paz,

Iurii Medvedev,

Vinícius da Silva,

Luiz Velho,

Nuno Gonçalves; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Schardong_2024_CVPR, author = {Schardong, Guilherme and Novello, Tiago and Paz, Hallison and Medvedev, Iurii and da Silva, Vin{\'\i}cius and Velho, Luiz and Gon\c{c}alves, Nuno}, title = {Neural Implicit Morphing of Face Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7321-7330} }
UniGS: Unified Representation for Image Generation and Segmentation: Lu Qi,

Lehan Yang,

Weidong Guo,

Yu Xu,

Bo Du,

Varun Jampani,

Ming-Hsuan Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qi_2024_CVPR, author = {Qi, Lu and Yang, Lehan and Guo, Weidong and Xu, Yu and Du, Bo and Jampani, Varun and Yang, Ming-Hsuan}, title = {UniGS: Unified Representation for Image Generation and Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6305-6315} }
Training-Free Open-Vocabulary Segmentation with Offline Diffusion-Augmented Prototype Generation: Luca Barsellotti,

Roberto Amoroso,

Marcella Cornia,

Lorenzo Baraldi,

Rita Cucchiara; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Barsellotti_2024_CVPR, author = {Barsellotti, Luca and Amoroso, Roberto and Cornia, Marcella and Baraldi, Lorenzo and Cucchiara, Rita}, title = {Training-Free Open-Vocabulary Segmentation with Offline Diffusion-Augmented Prototype Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3689-3698} }
HUGS: Human Gaussian Splats: Muhammed Kocabas,

Jen-Hao Rick Chang,

James Gabriel,

Oncel Tuzel,

Anurag Ranjan; [pdf] [arXiv]
[bibtex]
@InProceedings{Kocabas_2024_CVPR, author = {Kocabas, Muhammed and Chang, Jen-Hao Rick and Gabriel, James and Tuzel, Oncel and Ranjan, Anurag}, title = {HUGS: Human Gaussian Splats}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {505-515} }
PhysPT: Physics-aware Pretrained Transformer for Estimating Human Dynamics from Monocular Videos: Yufei Zhang,

Jeffrey O. Kephart,

Zijun Cui,

Qiang Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yufei and Kephart, Jeffrey O. and Cui, Zijun and Ji, Qiang}, title = {PhysPT: Physics-aware Pretrained Transformer for Estimating Human Dynamics from Monocular Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2305-2317} }
EfficientDreamer: High-Fidelity and Robust 3D Creation via Orthogonal-view Diffusion Priors: Zhipeng Hu,

Minda Zhao,

Chaoyi Zhao,

Xinyue Liang,

Lincheng Li,

Zeng Zhao,

Changjie Fan,

Xiaowei Zhou,

Xin Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2024_CVPR, author = {Hu, Zhipeng and Zhao, Minda and Zhao, Chaoyi and Liang, Xinyue and Li, Lincheng and Zhao, Zeng and Fan, Changjie and Zhou, Xiaowei and Yu, Xin}, title = {EfficientDreamer: High-Fidelity and Robust 3D Creation via Orthogonal-view Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4949-4958} }
HOIAnimator: Generating Text-prompt Human-object Animations using Novel Perceptive Diffusion Models: Wenfeng Song,

Xinyu Zhang,

Shuai Li,

Yang Gao,

Aimin Hao,

Xia Hou,

Chenglizhao Chen,

Ning Li,

Hong Qin; [pdf] [supp]
[bibtex]
@InProceedings{Song_2024_CVPR, author = {Song, Wenfeng and Zhang, Xinyu and Li, Shuai and Gao, Yang and Hao, Aimin and Hou, Xia and Chen, Chenglizhao and Li, Ning and Qin, Hong}, title = {HOIAnimator: Generating Text-prompt Human-object Animations using Novel Perceptive Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {811-820} }
SyncTalk: The Devil is in the Synchronization for Talking Head Synthesis: Ziqiao Peng,

Wentao Hu,

Yue Shi,

Xiangyu Zhu,

Xiaomei Zhang,

Hao Zhao,

Jun He,

Hongyan Liu,

Zhaoxin Fan; [pdf] [arXiv]
[bibtex]
@InProceedings{Peng_2024_CVPR, author = {Peng, Ziqiao and Hu, Wentao and Shi, Yue and Zhu, Xiangyu and Zhang, Xiaomei and Zhao, Hao and He, Jun and Liu, Hongyan and Fan, Zhaoxin}, title = {SyncTalk: The Devil is in the Synchronization for Talking Head Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {666-676} }
DreamSalon: A Staged Diffusion Framework for Preserving Identity-Context in Editable Face Generation: Haonan Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2024_CVPR, author = {Lin, Haonan}, title = {DreamSalon: A Staged Diffusion Framework for Preserving Identity-Context in Editable Face Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8589-8598} }
Neural Super-Resolution for Real-time Rendering with Radiance Demodulation: Jia Li,

Ziling Chen,

Xiaolong Wu,

Lu Wang,

Beibei Wang,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Jia and Chen, Ziling and Wu, Xiaolong and Wang, Lu and Wang, Beibei and Zhang, Lei}, title = {Neural Super-Resolution for Real-time Rendering with Radiance Demodulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4357-4367} }
MMM: Generative Masked Motion Model: Ekkasit Pinyoanuntapong,

Pu Wang,

Minwoo Lee,

Chen Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pinyoanuntapong_2024_CVPR, author = {Pinyoanuntapong, Ekkasit and Wang, Pu and Lee, Minwoo and Chen, Chen}, title = {MMM: Generative Masked Motion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1546-1555} }
PEGASUS: Personalized Generative 3D Avatars with Composable Attributes: Hyunsoo Cha,

Byungjun Kim,

Hanbyul Joo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cha_2024_CVPR, author = {Cha, Hyunsoo and Kim, Byungjun and Joo, Hanbyul}, title = {PEGASUS: Personalized Generative 3D Avatars with Composable Attributes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1072-1081} }
Diff-Plugin: Revitalizing Details for Diffusion-based Low-level Tasks: Yuhao Liu,

Zhanghan Ke,

Fang Liu,

Nanxuan Zhao,

Rynson W.H. Lau; [pdf]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Yuhao and Ke, Zhanghan and Liu, Fang and Zhao, Nanxuan and Lau, Rynson W.H.}, title = {Diff-Plugin: Revitalizing Details for Diffusion-based Low-level Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4197-4208} }
Intelligent Grimm - Open-ended Visual Storytelling via Latent Diffusion Models: Chang Liu,

Haoning Wu,

Yujie Zhong,

Xiaoyun Zhang,

Yanfeng Wang,

Weidi Xie; [pdf]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Chang and Wu, Haoning and Zhong, Yujie and Zhang, Xiaoyun and Wang, Yanfeng and Xie, Weidi}, title = {Intelligent Grimm - Open-ended Visual Storytelling via Latent Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6190-6200} }
GenTron: Diffusion Transformers for Image and Video Generation: Shoufa Chen,

Mengmeng Xu,

Jiawei Ren,

Yuren Cong,

Sen He,

Yanping Xie,

Animesh Sinha,

Ping Luo,

Tao Xiang,

Juan-Manuel Perez-Rua; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Shoufa and Xu, Mengmeng and Ren, Jiawei and Cong, Yuren and He, Sen and Xie, Yanping and Sinha, Animesh and Luo, Ping and Xiang, Tao and Perez-Rua, Juan-Manuel}, title = {GenTron: Diffusion Transformers for Image and Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6441-6451} }
TRIP: Temporal Residual Learning with Image Noise Prior for Image-to-Video Diffusion Models: Zhongwei Zhang,

Fuchen Long,

Yingwei Pan,

Zhaofan Qiu,

Ting Yao,

Yang Cao,

Tao Mei; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Zhongwei and Long, Fuchen and Pan, Yingwei and Qiu, Zhaofan and Yao, Ting and Cao, Yang and Mei, Tao}, title = {TRIP: Temporal Residual Learning with Image Noise Prior for Image-to-Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8671-8681} }
TexVocab: Texture Vocabulary-conditioned Human Avatars: Yuxiao Liu,

Zhe Li,

Yebin Liu,

Haoqian Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Yuxiao and Li, Zhe and Liu, Yebin and Wang, Haoqian}, title = {TexVocab: Texture Vocabulary-conditioned Human Avatars}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1715-1725} }
KITRO: Refining Human Mesh by 2D Clues and Kinematic-tree Rotation: Fengyuan Yang,

Kerui Gu,

Angela Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Fengyuan and Gu, Kerui and Yao, Angela}, title = {KITRO: Refining Human Mesh by 2D Clues and Kinematic-tree Rotation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1052-1061} }
SuGaR: Surface-Aligned Gaussian Splatting for Efficient 3D Mesh Reconstruction and High-Quality Mesh Rendering: Antoine Guédon,

Vincent Lepetit; [pdf] [supp]
[bibtex]
@InProceedings{Guedon_2024_CVPR, author = {Gu\'edon, Antoine and Lepetit, Vincent}, title = {SuGaR: Surface-Aligned Gaussian Splatting for Efficient 3D Mesh Reconstruction and High-Quality Mesh Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5354-5363} }
Towards Effective Usage of Human-Centric Priors in Diffusion Models for Text-based Human Image Generation: Junyan Wang,

Zhenhong Sun,

Zhiyu Tan,

Xuanbai Chen,

Weihua Chen,

Hao Li,

Cheng Zhang,

Yang Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Junyan and Sun, Zhenhong and Tan, Zhiyu and Chen, Xuanbai and Chen, Weihua and Li, Hao and Zhang, Cheng and Song, Yang}, title = {Towards Effective Usage of Human-Centric Priors in Diffusion Models for Text-based Human Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8446-8455} }
A Video is Worth 256 Bases: Spatial-Temporal Expectation-Maximization Inversion for Zero-Shot Video Editing: Maomao Li,

Yu Li,

Tianyu Yang,

Yunfei Liu,

Dongxu Yue,

Zhihui Lin,

Dong Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Maomao and Li, Yu and Yang, Tianyu and Liu, Yunfei and Yue, Dongxu and Lin, Zhihui and Xu, Dong}, title = {A Video is Worth 256 Bases: Spatial-Temporal Expectation-Maximization Inversion for Zero-Shot Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7528-7537} }
URHand: Universal Relightable Hands: Zhaoxi Chen,

Gyeongsik Moon,

Kaiwen Guo,

Chen Cao,

Stanislav Pidhorskyi,

Tomas Simon,

Rohan Joshi,

Yuan Dong,

Yichen Xu,

Bernardo Pires,

He Wen,

Lucas Evans,

Bo Peng,

Julia Buffalini,

Autumn Trimble,

Kevyn McPhail,

Melissa Schoeller,

Shoou-I Yu,

Javier Romero,

Michael Zollhofer,

Yaser Sheikh,

Ziwei Liu,

Shunsuke Saito; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Zhaoxi and Moon, Gyeongsik and Guo, Kaiwen and Cao, Chen and Pidhorskyi, Stanislav and Simon, Tomas and Joshi, Rohan and Dong, Yuan and Xu, Yichen and Pires, Bernardo and Wen, He and Evans, Lucas and Peng, Bo and Buffalini, Julia and Trimble, Autumn and McPhail, Kevyn and Schoeller, Melissa and Yu, Shoou-I and Romero, Javier and Zollhofer, Michael and Sheikh, Yaser and Liu, Ziwei and Saito, Shunsuke}, title = {URHand: Universal Relightable Hands}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {119-129} }
Named Entity Driven Zero-Shot Image Manipulation: Zhida Feng,

Li Chen,

Jing Tian,

JiaXiang Liu,

Shikun Feng; [pdf] [supp]
[bibtex]
@InProceedings{Feng_2024_CVPR, author = {Feng, Zhida and Chen, Li and Tian, Jing and Liu, JiaXiang and Feng, Shikun}, title = {Named Entity Driven Zero-Shot Image Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9110-9119} }
ESR-NeRF: Emissive Source Reconstruction Using LDR Multi-view Images: Jinseo Jeong,

Junseo Koo,

Qimeng Zhang,

Gunhee Kim; [pdf] [supp]
[bibtex]
@InProceedings{Jeong_2024_CVPR, author = {Jeong, Jinseo and Koo, Junseo and Zhang, Qimeng and Kim, Gunhee}, title = {ESR-NeRF: Emissive Source Reconstruction Using LDR Multi-view Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4598-4609} }
Infer from What You Have Seen Before: Temporally-dependent Classifier for Semi-supervised Video Segmentation: Jiafan Zhuang,

Zilei Wang,

Yixin Zhang,

Zhun Fan; [pdf]
[bibtex]
@InProceedings{Zhuang_2024_CVPR, author = {Zhuang, Jiafan and Wang, Zilei and Zhang, Yixin and Fan, Zhun}, title = {Infer from What You Have Seen Before: Temporally-dependent Classifier for Semi-supervised Video Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3575-3584} }
Video Frame Interpolation via Direct Synthesis with the Event-based Reference: Yuhan Liu,

Yongjian Deng,

Hao Chen,

Zhen Yang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Yuhan and Deng, Yongjian and Chen, Hao and Yang, Zhen}, title = {Video Frame Interpolation via Direct Synthesis with the Event-based Reference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8477-8487} }
DSL-FIQA: Assessing Facial Image Quality via Dual-Set Degradation Learning and Landmark-Guided Transformer: Wei-Ting Chen,

Gurunandan Krishnan,

Qiang Gao,

Sy-Yen Kuo,

Sizhou Ma,

Jian Wang; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Wei-Ting and Krishnan, Gurunandan and Gao, Qiang and Kuo, Sy-Yen and Ma, Sizhou and Wang, Jian}, title = {DSL-FIQA: Assessing Facial Image Quality via Dual-Set Degradation Learning and Landmark-Guided Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2931-2941} }
FMA-Net: Flow-Guided Dynamic Filtering and Iterative Feature Refinement with Multi-Attention for Joint Video Super-Resolution and Deblurring: Geunhyuk Youk,

Jihyong Oh,

Munchurl Kim; [pdf] [supp]
[bibtex]
@InProceedings{Youk_2024_CVPR, author = {Youk, Geunhyuk and Oh, Jihyong and Kim, Munchurl}, title = {FMA-Net: Flow-Guided Dynamic Filtering and Iterative Feature Refinement with Multi-Attention for Joint Video Super-Resolution and Deblurring}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {44-55} }
Hourglass Tokenizer for Efficient Transformer-Based 3D Human Pose Estimation: Wenhao Li,

Mengyuan Liu,

Hong Liu,

Pichao Wang,

Jialun Cai,

Nicu Sebe; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Wenhao and Liu, Mengyuan and Liu, Hong and Wang, Pichao and Cai, Jialun and Sebe, Nicu}, title = {Hourglass Tokenizer for Efficient Transformer-Based 3D Human Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {604-613} }
Boosting Diffusion Models with Moving Average Sampling in Frequency Domain: Yurui Qian,

Qi Cai,

Yingwei Pan,

Yehao Li,

Ting Yao,

Qibin Sun,

Tao Mei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qian_2024_CVPR, author = {Qian, Yurui and Cai, Qi and Pan, Yingwei and Li, Yehao and Yao, Ting and Sun, Qibin and Mei, Tao}, title = {Boosting Diffusion Models with Moving Average Sampling in Frequency Domain}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8911-8920} }
Bi-Causal: Group Activity Recognition via Bidirectional Causality: Youliang Zhang,

Wenxuan Liu,

Danni Xu,

Zhuo Zhou,

Zheng Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Youliang and Liu, Wenxuan and Xu, Danni and Zhou, Zhuo and Wang, Zheng}, title = {Bi-Causal: Group Activity Recognition via Bidirectional Causality}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1450-1459} }
Space-Time Diffusion Features for Zero-Shot Text-Driven Motion Transfer: Danah Yatim,

Rafail Fridman,

Omer Bar-Tal,

Yoni Kasten,

Tali Dekel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yatim_2024_CVPR, author = {Yatim, Danah and Fridman, Rafail and Bar-Tal, Omer and Kasten, Yoni and Dekel, Tali}, title = {Space-Time Diffusion Features for Zero-Shot Text-Driven Motion Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8466-8476} }
MIGC: Multi-Instance Generation Controller for Text-to-Image Synthesis: Dewei Zhou,

You Li,

Fan Ma,

Xiaoting Zhang,

Yi Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2024_CVPR, author = {Zhou, Dewei and Li, You and Ma, Fan and Zhang, Xiaoting and Yang, Yi}, title = {MIGC: Multi-Instance Generation Controller for Text-to-Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6818-6828} }
Distilling CLIP with Dual Guidance for Learning Discriminative Human Body Shape Representation: Feng Liu,

Minchul Kim,

Zhiyuan Ren,

Xiaoming Liu; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Feng and Kim, Minchul and Ren, Zhiyuan and Liu, Xiaoming}, title = {Distilling CLIP with Dual Guidance for Learning Discriminative Human Body Shape Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {256-266} }
LLaFS: When Large Language Models Meet Few-Shot Segmentation: Lanyun Zhu,

Tianrun Chen,

Deyi Ji,

Jieping Ye,

Jun Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2024_CVPR, author = {Zhu, Lanyun and Chen, Tianrun and Ji, Deyi and Ye, Jieping and Liu, Jun}, title = {LLaFS: When Large Language Models Meet Few-Shot Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3065-3075} }
Kernel Adaptive Convolution for Scene Text Detection via Distance Map Prediction: Jinzhi Zheng,

Heng Fan,

Libo Zhang; [pdf]
[bibtex]
@InProceedings{Zheng_2024_CVPR, author = {Zheng, Jinzhi and Fan, Heng and Zhang, Libo}, title = {Kernel Adaptive Convolution for Scene Text Detection via Distance Map Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5957-5966} }
Adaptive Multi-Modal Cross-Entropy Loss for Stereo Matching: Peng Xu,

Zhiyu Xiang,

Chengyu Qiao,

Jingyun Fu,

Tianyu Pu; [pdf] [arXiv]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Peng and Xiang, Zhiyu and Qiao, Chengyu and Fu, Jingyun and Pu, Tianyu}, title = {Adaptive Multi-Modal Cross-Entropy Loss for Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5135-5144} }
Unlocking the Potential of Prompt-Tuning in Bridging Generalized and Personalized Federated Learning: Wenlong Deng,

Christos Thrampoulidis,

Xiaoxiao Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deng_2024_CVPR, author = {Deng, Wenlong and Thrampoulidis, Christos and Li, Xiaoxiao}, title = {Unlocking the Potential of Prompt-Tuning in Bridging Generalized and Personalized Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6087-6097} }
GALA: Generating Animatable Layered Assets from a Single Scan: Taeksoo Kim,

Byungjun Kim,

Shunsuke Saito,

Hanbyul Joo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2024_CVPR, author = {Kim, Taeksoo and Kim, Byungjun and Saito, Shunsuke and Joo, Hanbyul}, title = {GALA: Generating Animatable Layered Assets from a Single Scan}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1535-1545} }
LeGO: Leveraging a Surface Deformation Network for Animatable Stylized Face Generation with One Example: Soyeon Yoon,

Kwan Yun,

Kwanggyoon Seo,

Sihun Cha,

Jung Eun Yoo,

Junyong Noh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yoon_2024_CVPR, author = {Yoon, Soyeon and Yun, Kwan and Seo, Kwanggyoon and Cha, Sihun and Yoo, Jung Eun and Noh, Junyong}, title = {LeGO: Leveraging a Surface Deformation Network for Animatable Stylized Face Generation with One Example}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4505-4514} }
Frequency-Adaptive Dilated Convolution for Semantic Segmentation: Linwei Chen,

Lin Gu,

Dezhi Zheng,

Ying Fu; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Linwei and Gu, Lin and Zheng, Dezhi and Fu, Ying}, title = {Frequency-Adaptive Dilated Convolution for Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3414-3425} }
Multiple View Geometry Transformers for 3D Human Pose Estimation: Ziwei Liao,

Jialiang Zhu,

Chunyu Wang,

Han Hu,

Steven L. Waslander; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liao_2024_CVPR, author = {Liao, Ziwei and Zhu, Jialiang and Wang, Chunyu and Hu, Han and Waslander, Steven L.}, title = {Multiple View Geometry Transformers for 3D Human Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {708-717} }
SiTH: Single-view Textured Human Reconstruction with Image-Conditioned Diffusion: Hsuan- I Ho,

Jie Song,

Otmar Hilliges; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{I_Ho_2024_CVPR, author = {I Ho, Hsuan- and Song, Jie and Hilliges, Otmar}, title = {SiTH: Single-view Textured Human Reconstruction with Image-Conditioned Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {538-549} }
DynVideo-E: Harnessing Dynamic NeRF for Large-Scale Motion- and View-Change Human-Centric Video Editing: Jia-Wei Liu,

Yan-Pei Cao,

Jay Zhangjie Wu,

Weijia Mao,

Yuchao Gu,

Rui Zhao,

Jussi Keppo,

Ying Shan,

Mike Zheng Shou; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Jia-Wei and Cao, Yan-Pei and Wu, Jay Zhangjie and Mao, Weijia and Gu, Yuchao and Zhao, Rui and Keppo, Jussi and Shan, Ying and Shou, Mike Zheng}, title = {DynVideo-E: Harnessing Dynamic NeRF for Large-Scale Motion- and View-Change Human-Centric Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7664-7674} }
Real-Time Neural BRDF with Spherically Distributed Primitives: Yishun Dou,

Zhong Zheng,

Qiaoqiao Jin,

Bingbing Ni,

Yugang Chen,

Junxiang Ke; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dou_2024_CVPR, author = {Dou, Yishun and Zheng, Zhong and Jin, Qiaoqiao and Ni, Bingbing and Chen, Yugang and Ke, Junxiang}, title = {Real-Time Neural BRDF with Spherically Distributed Primitives}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4337-4346} }
VideoCrafter2: Overcoming Data Limitations for High-Quality Video Diffusion Models: Haoxin Chen,

Yong Zhang,

Xiaodong Cun,

Menghan Xia,

Xintao Wang,

Chao Weng,

Ying Shan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Haoxin and Zhang, Yong and Cun, Xiaodong and Xia, Menghan and Wang, Xintao and Weng, Chao and Shan, Ying}, title = {VideoCrafter2: Overcoming Data Limitations for High-Quality Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7310-7320} }
Style Injection in Diffusion: A Training-free Approach for Adapting Large-scale Diffusion Models for Style Transfer: Jiwoo Chung,

Sangeek Hyun,

Jae-Pil Heo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chung_2024_CVPR, author = {Chung, Jiwoo and Hyun, Sangeek and Heo, Jae-Pil}, title = {Style Injection in Diffusion: A Training-free Approach for Adapting Large-scale Diffusion Models for Style Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8795-8805} }
OrthCaps: An Orthogonal CapsNet with Sparse Attention Routing and Pruning: Xinyu Geng,

Jiaming Wang,

Jiawei Gong,

Yuerong Xue,

Jun Xu,

Fanglin Chen,

Xiaolin Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Geng_2024_CVPR, author = {Geng, Xinyu and Wang, Jiaming and Gong, Jiawei and Xue, Yuerong and Xu, Jun and Chen, Fanglin and Huang, Xiaolin}, title = {OrthCaps: An Orthogonal CapsNet with Sparse Attention Routing and Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6037-6046} }
Florence-2: Advancing a Unified Representation for a Variety of Vision Tasks: Bin Xiao,

Haiping Wu,

Weijian Xu,

Xiyang Dai,

Houdong Hu,

Yumao Lu,

Michael Zeng,

Ce Liu,

Lu Yuan; [pdf] [supp]
[bibtex]
@InProceedings{Xiao_2024_CVPR, author = {Xiao, Bin and Wu, Haiping and Xu, Weijian and Dai, Xiyang and Hu, Houdong and Lu, Yumao and Zeng, Michael and Liu, Ce and Yuan, Lu}, title = {Florence-2: Advancing a Unified Representation for a Variety of Vision Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4818-4829} }
NeRF On-the-go: Exploiting Uncertainty for Distractor-free NeRFs in the Wild: Weining Ren,

Zihan Zhu,

Boyang Sun,

Jiaqi Chen,

Marc Pollefeys,

Songyou Peng; [pdf] [supp]
[bibtex]
@InProceedings{Ren_2024_CVPR, author = {Ren, Weining and Zhu, Zihan and Sun, Boyang and Chen, Jiaqi and Pollefeys, Marc and Peng, Songyou}, title = {NeRF On-the-go: Exploiting Uncertainty for Distractor-free NeRFs in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8931-8940} }
3D Human Pose Perception from Egocentric Stereo Videos: Hiroyasu Akada,

Jian Wang,

Vladislav Golyanik,

Christian Theobalt; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Akada_2024_CVPR, author = {Akada, Hiroyasu and Wang, Jian and Golyanik, Vladislav and Theobalt, Christian}, title = {3D Human Pose Perception from Egocentric Stereo Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {767-776} }
Grid Diffusion Models for Text-to-Video Generation: Taegyeong Lee,

Soyeong Kwon,

Taehwan Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2024_CVPR, author = {Lee, Taegyeong and Kwon, Soyeong and Kim, Taehwan}, title = {Grid Diffusion Models for Text-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8734-8743} }
LucidDreamer: Towards High-Fidelity Text-to-3D Generation via Interval Score Matching: Yixun Liang,

Xin Yang,

Jiantao Lin,

Haodong Li,

Xiaogang Xu,

Yingcong Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2024_CVPR, author = {Liang, Yixun and Yang, Xin and Lin, Jiantao and Li, Haodong and Xu, Xiaogang and Chen, Yingcong}, title = {LucidDreamer: Towards High-Fidelity Text-to-3D Generation via Interval Score Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6517-6526} }
PTM-VQA: Efficient Video Quality Assessment Leveraging Diverse PreTrained Models from the Wild: Kun Yuan,

Hongbo Liu,

Mading Li,

Muyi Sun,

Ming Sun,

Jiachao Gong,

Jinhua Hao,

Chao Zhou,

Yansong Tang; [pdf]
[bibtex]
@InProceedings{Yuan_2024_CVPR, author = {Yuan, Kun and Liu, Hongbo and Li, Mading and Sun, Muyi and Sun, Ming and Gong, Jiachao and Hao, Jinhua and Zhou, Chao and Tang, Yansong}, title = {PTM-VQA: Efficient Video Quality Assessment Leveraging Diverse PreTrained Models from the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2835-2845} }
REACTO: Reconstructing Articulated Objects from a Single Video: Chaoyue Song,

Jiacheng Wei,

Chuan Sheng Foo,

Guosheng Lin,

Fayao Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2024_CVPR, author = {Song, Chaoyue and Wei, Jiacheng and Foo, Chuan Sheng and Lin, Guosheng and Liu, Fayao}, title = {REACTO: Reconstructing Articulated Objects from a Single Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5384-5395} }
Egocentric Whole-Body Motion Capture with FisheyeViT and Diffusion-Based Motion Refinement: Jian Wang,

Zhe Cao,

Diogo Luvizon,

Lingjie Liu,

Kripasindhu Sarkar,

Danhang Tang,

Thabo Beeler,

Christian Theobalt; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Jian and Cao, Zhe and Luvizon, Diogo and Liu, Lingjie and Sarkar, Kripasindhu and Tang, Danhang and Beeler, Thabo and Theobalt, Christian}, title = {Egocentric Whole-Body Motion Capture with FisheyeViT and Diffusion-Based Motion Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {777-787} }
Language Embedded 3D Gaussians for Open-Vocabulary Scene Understanding: Jin-Chuan Shi,

Miao Wang,

Hao-Bin Duan,

Shao-Hua Guan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2024_CVPR, author = {Shi, Jin-Chuan and Wang, Miao and Duan, Hao-Bin and Guan, Shao-Hua}, title = {Language Embedded 3D Gaussians for Open-Vocabulary Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5333-5343} }
Towards Automated Movie Trailer Generation: Dawit Mureja Argaw,

Mattia Soldan,

Alejandro Pardo,

Chen Zhao,

Fabian Caba Heilbron,

Joon Son Chung,

Bernard Ghanem; [pdf] [arXiv]
[bibtex]
@InProceedings{Argaw_2024_CVPR, author = {Argaw, Dawit Mureja and Soldan, Mattia and Pardo, Alejandro and Zhao, Chen and Heilbron, Fabian Caba and Chung, Joon Son and Ghanem, Bernard}, title = {Towards Automated Movie Trailer Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7445-7454} }
Sheared Backpropagation for Fine-tuning Foundation Models: Zhiyuan Yu,

Li Shen,

Liang Ding,

Xinmei Tian,

Yixin Chen,

Dacheng Tao; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2024_CVPR, author = {Yu, Zhiyuan and Shen, Li and Ding, Liang and Tian, Xinmei and Chen, Yixin and Tao, Dacheng}, title = {Sheared Backpropagation for Fine-tuning Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5883-5892} }
Misalignment-Robust Frequency Distribution Loss for Image Transformation: Zhangkai Ni,

Juncheng Wu,

Zian Wang,

Wenhan Yang,

Hanli Wang,

Lin Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ni_2024_CVPR, author = {Ni, Zhangkai and Wu, Juncheng and Wang, Zian and Yang, Wenhan and Wang, Hanli and Ma, Lin}, title = {Misalignment-Robust Frequency Distribution Loss for Image Transformation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2910-2919} }
Degrees of Freedom Matter: Inferring Dynamics from Point Trajectories: Yan Zhang,

Sergey Prokudin,

Marko Mihajlovic,

Qianli Ma,

Siyu Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yan and Prokudin, Sergey and Mihajlovic, Marko and Ma, Qianli and Tang, Siyu}, title = {Degrees of Freedom Matter: Inferring Dynamics from Point Trajectories}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2018-2028} }
Low-Latency Neural Stereo Streaming: Qiqi Hou,

Farzad Farhadzadeh,

Amir Said,

Guillaume Sautiere,

Hoang Le; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hou_2024_CVPR, author = {Hou, Qiqi and Farhadzadeh, Farzad and Said, Amir and Sautiere, Guillaume and Le, Hoang}, title = {Low-Latency Neural Stereo Streaming}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7974-7984} }
Intrinsic Image Diffusion for Indoor Single-view Material Estimation: Peter Kocsis,

Vincent Sitzmann,

Matthias Nießner; [pdf] [supp]
[bibtex]
@InProceedings{Kocsis_2024_CVPR, author = {Kocsis, Peter and Sitzmann, Vincent and Nie{\ss}ner, Matthias}, title = {Intrinsic Image Diffusion for Indoor Single-view Material Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5198-5208} }
Material Palette: Extraction of Materials from a Single Image: Ivan Lopes,

Fabio Pizzati,

Raoul de Charette; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lopes_2024_CVPR, author = {Lopes, Ivan and Pizzati, Fabio and de Charette, Raoul}, title = {Material Palette: Extraction of Materials from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4379-4388} }
RealCustom: Narrowing Real Text Word for Real-Time Open-Domain Text-to-Image Customization: Mengqi Huang,

Zhendong Mao,

Mingcong Liu,

Qian He,

Yongdong Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Huang_2024_CVPR, author = {Huang, Mengqi and Mao, Zhendong and Liu, Mingcong and He, Qian and Zhang, Yongdong}, title = {RealCustom: Narrowing Real Text Word for Real-Time Open-Domain Text-to-Image Customization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7476-7485} }
Text2QR: Harmonizing Aesthetic Customization and Scanning Robustness for Text-Guided QR Code Generation: Guangyang Wu,

Xiaohong Liu,

Jun Jia,

Xuehao Cui,

Guangtao Zhai; [pdf] [arXiv]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, Guangyang and Liu, Xiaohong and Jia, Jun and Cui, Xuehao and Zhai, Guangtao}, title = {Text2QR: Harmonizing Aesthetic Customization and Scanning Robustness for Text-Guided QR Code Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8456-8465} }
ECLIPSE: A Resource-Efficient Text-to-Image Prior for Image Generations: Maitreya Patel,

Changhoon Kim,

Sheng Cheng,

Chitta Baral,

Yezhou Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Patel_2024_CVPR, author = {Patel, Maitreya and Kim, Changhoon and Cheng, Sheng and Baral, Chitta and Yang, Yezhou}, title = {ECLIPSE: A Resource-Efficient Text-to-Image Prior for Image Generations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9069-9078} }
Adaptive Bidirectional Displacement for Semi-Supervised Medical Image Segmentation: Hanyang Chi,

Jian Pang,

Bingfeng Zhang,

Weifeng Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chi_2024_CVPR, author = {Chi, Hanyang and Pang, Jian and Zhang, Bingfeng and Liu, Weifeng}, title = {Adaptive Bidirectional Displacement for Semi-Supervised Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4070-4080} }
Accurate Training Data for Occupancy Map Prediction in Automated Driving Using Evidence Theory: Jonas Kälble,

Sascha Wirges,

Maxim Tatarchenko,

Eddy Ilg; [pdf] [supp]
[bibtex]
@InProceedings{Kalble_2024_CVPR, author = {K\"alble, Jonas and Wirges, Sascha and Tatarchenko, Maxim and Ilg, Eddy}, title = {Accurate Training Data for Occupancy Map Prediction in Automated Driving Using Evidence Theory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5281-5290} }
DiffusionLight: Light Probes for Free by Painting a Chrome Ball: Pakkapon Phongthawee,

Worameth Chinchuthakun,

Nontaphat Sinsunthithet,

Varun Jampani,

Amit Raj,

Pramook Khungurn,

Supasorn Suwajanakorn; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Phongthawee_2024_CVPR, author = {Phongthawee, Pakkapon and Chinchuthakun, Worameth and Sinsunthithet, Nontaphat and Jampani, Varun and Raj, Amit and Khungurn, Pramook and Suwajanakorn, Supasorn}, title = {DiffusionLight: Light Probes for Free by Painting a Chrome Ball}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {98-108} }
Rethinking the Spatial Inconsistency in Classifier-Free Diffusion Guidance: Dazhong Shen,

Guanglu Song,

Zeyue Xue,

Fu-Yun Wang,

Yu Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shen_2024_CVPR, author = {Shen, Dazhong and Song, Guanglu and Xue, Zeyue and Wang, Fu-Yun and Liu, Yu}, title = {Rethinking the Spatial Inconsistency in Classifier-Free Diffusion Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9370-9379} }
KTPFormer: Kinematics and Trajectory Prior Knowledge-Enhanced Transformer for 3D Human Pose Estimation: Jihua Peng,

Yanghong Zhou,

P. Y. Mok; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2024_CVPR, author = {Peng, Jihua and Zhou, Yanghong and Mok, P. Y.}, title = {KTPFormer: Kinematics and Trajectory Prior Knowledge-Enhanced Transformer for 3D Human Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1123-1132} }
Differentiable Micro-Mesh Construction: Yishun Dou,

Zhong Zheng,

Qiaoqiao Jin,

Rui Shi,

Yuhan Li,

Bingbing Ni; [pdf] [supp]
[bibtex]
@InProceedings{Dou_2024_CVPR, author = {Dou, Yishun and Zheng, Zhong and Jin, Qiaoqiao and Shi, Rui and Li, Yuhan and Ni, Bingbing}, title = {Differentiable Micro-Mesh Construction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4294-4303} }
SNED: Superposition Network Architecture Search for Efficient Video Diffusion Model: Zhengang Li,

Yan Kang,

Yuchen Liu,

Difan Liu,

Tobias Hinz,

Feng Liu,

Yanzhi Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Zhengang and Kang, Yan and Liu, Yuchen and Liu, Difan and Hinz, Tobias and Liu, Feng and Wang, Yanzhi}, title = {SNED: Superposition Network Architecture Search for Efficient Video Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8661-8670} }
LeftRefill: Filling Right Canvas based on Left Reference through Generalized Text-to-Image Diffusion Model: Chenjie Cao,

Yunuo Cai,

Qiaole Dong,

Yikai Wang,

Yanwei Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2024_CVPR, author = {Cao, Chenjie and Cai, Yunuo and Dong, Qiaole and Wang, Yikai and Fu, Yanwei}, title = {LeftRefill: Filling Right Canvas based on Left Reference through Generalized Text-to-Image Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7705-7715} }
Personalized Residuals for Concept-Driven Text-to-Image Generation: Cusuh Ham,

Matthew Fisher,

James Hays,

Nicholas Kolkin,

Yuchen Liu,

Richard Zhang,

Tobias Hinz; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ham_2024_CVPR, author = {Ham, Cusuh and Fisher, Matthew and Hays, James and Kolkin, Nicholas and Liu, Yuchen and Zhang, Richard and Hinz, Tobias}, title = {Personalized Residuals for Concept-Driven Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8186-8195} }
Condition-Aware Neural Network for Controlled Image Generation: Han Cai,

Muyang Li,

Qinsheng Zhang,

Ming-Yu Liu,

Song Han; [pdf] [arXiv]
[bibtex]
@InProceedings{Cai_2024_CVPR, author = {Cai, Han and Li, Muyang and Zhang, Qinsheng and Liu, Ming-Yu and Han, Song}, title = {Condition-Aware Neural Network for Controlled Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7194-7203} }
Prompt Augmentation for Self-supervised Text-guided Image Manipulation: Rumeysa Bodur,

Binod Bhattarai,

Tae-Kyun Kim; [pdf] [supp]
[bibtex]
@InProceedings{Bodur_2024_CVPR, author = {Bodur, Rumeysa and Bhattarai, Binod and Kim, Tae-Kyun}, title = {Prompt Augmentation for Self-supervised Text-guided Image Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8829-8838} }
Guess The Unseen: Dynamic 3D Scene Reconstruction from Partial 2D Glimpses: Inhee Lee,

Byungjun Kim,

Hanbyul Joo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2024_CVPR, author = {Lee, Inhee and Kim, Byungjun and Joo, Hanbyul}, title = {Guess The Unseen: Dynamic 3D Scene Reconstruction from Partial 2D Glimpses}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1062-1071} }
HyperDreamBooth: HyperNetworks for Fast Personalization of Text-to-Image Models: Nataniel Ruiz,

Yuanzhen Li,

Varun Jampani,

Wei Wei,

Tingbo Hou,

Yael Pritch,

Neal Wadhwa,

Michael Rubinstein,

Kfir Aberman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ruiz_2024_CVPR, author = {Ruiz, Nataniel and Li, Yuanzhen and Jampani, Varun and Wei, Wei and Hou, Tingbo and Pritch, Yael and Wadhwa, Neal and Rubinstein, Michael and Aberman, Kfir}, title = {HyperDreamBooth: HyperNetworks for Fast Personalization of Text-to-Image Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6527-6536} }
HardMo: A Large-Scale Hardcase Dataset for Motion Capture: Jiaqi Liao,

Chuanchen Luo,

Yinuo Du,

Yuxi Wang,

Xucheng Yin,

Man Zhang,

Zhaoxiang Zhang,

Junran Peng; [pdf] [supp]
[bibtex]
@InProceedings{Liao_2024_CVPR, author = {Liao, Jiaqi and Luo, Chuanchen and Du, Yinuo and Wang, Yuxi and Yin, Xucheng and Zhang, Man and Zhang, Zhaoxiang and Peng, Junran}, title = {HardMo: A Large-Scale Hardcase Dataset for Motion Capture}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1629-1638} }
Separate and Conquer: Decoupling Co-occurrence via Decomposition and Representation for Weakly Supervised Semantic Segmentation: Zhiwei Yang,

Kexue Fu,

Minghong Duan,

Linhao Qu,

Shuo Wang,

Zhijian Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Zhiwei and Fu, Kexue and Duan, Minghong and Qu, Linhao and Wang, Shuo and Song, Zhijian}, title = {Separate and Conquer: Decoupling Co-occurrence via Decomposition and Representation for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3606-3615} }
BiPer: Binary Neural Networks using a Periodic Function: Edwin Vargas,

Claudia V. Correa,

Carlos Hinojosa,

Henry Arguello; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vargas_2024_CVPR, author = {Vargas, Edwin and Correa, Claudia V. and Hinojosa, Carlos and Arguello, Henry}, title = {BiPer: Binary Neural Networks using a Periodic Function}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5684-5693} }
Segment Any Event Streams via Weighted Adaptation of Pivotal Tokens: Zhiwen Chen,

Zhiyu Zhu,

Yifan Zhang,

Junhui Hou,

Guangming Shi,

Jinjian Wu; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Zhiwen and Zhu, Zhiyu and Zhang, Yifan and Hou, Junhui and Shi, Guangming and Wu, Jinjian}, title = {Segment Any Event Streams via Weighted Adaptation of Pivotal Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3890-3900} }
AnyDoor: Zero-shot Object-level Image Customization: Xi Chen,

Lianghua Huang,

Yu Liu,

Yujun Shen,

Deli Zhao,

Hengshuang Zhao; [pdf] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Xi and Huang, Lianghua and Liu, Yu and Shen, Yujun and Zhao, Deli and Zhao, Hengshuang}, title = {AnyDoor: Zero-shot Object-level Image Customization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6593-6602} }
Clustering Propagation for Universal Medical Image Segmentation: Yuhang Ding,

Liulei Li,

Wenguan Wang,

Yi Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ding_2024_CVPR, author = {Ding, Yuhang and Li, Liulei and Wang, Wenguan and Yang, Yi}, title = {Clustering Propagation for Universal Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3357-3369} }
Garment Recovery with Shape and Deformation Priors: Ren Li,

Corentin Dumery,

Benoît Guillard,

Pascal Fua; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Ren and Dumery, Corentin and Guillard, Beno{\^\i}t and Fua, Pascal}, title = {Garment Recovery with Shape and Deformation Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1586-1595} }
Psychometry: An Omnifit Model for Image Reconstruction from Human Brain Activity: Ruijie Quan,

Wenguan Wang,

Zhibo Tian,

Fan Ma,

Yi Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Quan_2024_CVPR, author = {Quan, Ruijie and Wang, Wenguan and Tian, Zhibo and Ma, Fan and Yang, Yi}, title = {Psychometry: An Omnifit Model for Image Reconstruction from Human Brain Activity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {233-243} }
Exploring Regional Clues in CLIP for Zero-Shot Semantic Segmentation: Yi Zhang,

Meng-Hao Guo,

Miao Wang,

Shi-Min Hu; [pdf]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yi and Guo, Meng-Hao and Wang, Miao and Hu, Shi-Min}, title = {Exploring Regional Clues in CLIP for Zero-Shot Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3270-3280} }
Move as You Say Interact as You Can: Language-guided Human Motion Generation with Scene Affordance: Zan Wang,

Yixin Chen,

Baoxiong Jia,

Puhao Li,

Jinlu Zhang,

Jingze Zhang,

Tengyu Liu,

Yixin Zhu,

Wei Liang,

Siyuan Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Zan and Chen, Yixin and Jia, Baoxiong and Li, Puhao and Zhang, Jinlu and Zhang, Jingze and Liu, Tengyu and Zhu, Yixin and Liang, Wei and Huang, Siyuan}, title = {Move as You Say Interact as You Can: Language-guided Human Motion Generation with Scene Affordance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {433-444} }
Generalizable Face Landmarking Guided by Conditional Face Warping: Jiayi Liang,

Haotian Liu,

Hongteng Xu,

Dixin Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2024_CVPR, author = {Liang, Jiayi and Liu, Haotian and Xu, Hongteng and Luo, Dixin}, title = {Generalizable Face Landmarking Guided by Conditional Face Warping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2425-2435} }
Sat2Scene: 3D Urban Scene Generation from Satellite Images with Diffusion: Zuoyue Li,

Zhenqiang Li,

Zhaopeng Cui,

Marc Pollefeys,

Martin R. Oswald; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Zuoyue and Li, Zhenqiang and Cui, Zhaopeng and Pollefeys, Marc and Oswald, Martin R.}, title = {Sat2Scene: 3D Urban Scene Generation from Satellite Images with Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7141-7150} }
Control4D: Efficient 4D Portrait Editing with Text: Ruizhi Shao,

Jingxiang Sun,

Cheng Peng,

Zerong Zheng,

Boyao Zhou,

Hongwen Zhang,

Yebin Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shao_2024_CVPR, author = {Shao, Ruizhi and Sun, Jingxiang and Peng, Cheng and Zheng, Zerong and Zhou, Boyao and Zhang, Hongwen and Liu, Yebin}, title = {Control4D: Efficient 4D Portrait Editing with Text}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4556-4567} }
CLIPtone: Unsupervised Learning for Text-based Image Tone Adjustment: Hyeongmin Lee,

Kyoungkook Kang,

Jungseul Ok,

Sunghyun Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2024_CVPR, author = {Lee, Hyeongmin and Kang, Kyoungkook and Ok, Jungseul and Cho, Sunghyun}, title = {CLIPtone: Unsupervised Learning for Text-based Image Tone Adjustment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2942-2951} }
Codebook Transfer with Part-of-Speech for Vector-Quantized Image Modeling: Baoquan Zhang,

Huaibin Wang,

Chuyao Luo,

Xutao Li,

Guotao Liang,

Yunming Ye,

Xiaochen Qi,

Yao He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Baoquan and Wang, Huaibin and Luo, Chuyao and Li, Xutao and Liang, Guotao and Ye, Yunming and Qi, Xiaochen and He, Yao}, title = {Codebook Transfer with Part-of-Speech for Vector-Quantized Image Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7757-7766} }
InceptionNeXt: When Inception Meets ConvNeXt: Weihao Yu,

Pan Zhou,

Shuicheng Yan,

Xinchao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2024_CVPR, author = {Yu, Weihao and Zhou, Pan and Yan, Shuicheng and Wang, Xinchao}, title = {InceptionNeXt: When Inception Meets ConvNeXt}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5672-5683} }
LiveHPS: LiDAR-based Scene-level Human Pose and Shape Estimation in Free Environment: Yiming Ren,

Xiao Han,

Chengfeng Zhao,

Jingya Wang,

Lan Xu,

Jingyi Yu,

Yuexin Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2024_CVPR, author = {Ren, Yiming and Han, Xiao and Zhao, Chengfeng and Wang, Jingya and Xu, Lan and Yu, Jingyi and Ma, Yuexin}, title = {LiveHPS: LiDAR-based Scene-level Human Pose and Shape Estimation in Free Environment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1281-1291} }
Segment Every Out-of-Distribution Object: Wenjie Zhao,

Jia Li,

Xin Dong,

Yu Xiang,

Yunhui Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2024_CVPR, author = {Zhao, Wenjie and Li, Jia and Dong, Xin and Xiang, Yu and Guo, Yunhui}, title = {Segment Every Out-of-Distribution Object}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3910-3920} }
Wavelet-based Fourier Information Interaction with Frequency Diffusion Adjustment for Underwater Image Restoration: Chen Zhao,

Weiling Cai,

Chenyu Dong,

Chengwei Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2024_CVPR, author = {Zhao, Chen and Cai, Weiling and Dong, Chenyu and Hu, Chengwei}, title = {Wavelet-based Fourier Information Interaction with Frequency Diffusion Adjustment for Underwater Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8281-8291} }
PoNQ: a Neural QEM-based Mesh Representation: Nissim Maruani,

Maks Ovsjanikov,

Pierre Alliez,

Mathieu Desbrun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Maruani_2024_CVPR, author = {Maruani, Nissim and Ovsjanikov, Maks and Alliez, Pierre and Desbrun, Mathieu}, title = {PoNQ: a Neural QEM-based Mesh Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3647-3657} }
Boosting Order-Preserving and Transferability for Neural Architecture Search: a Joint Architecture Refined Search and Fine-tuning Approach: Beichen Zhang,

Xiaoxing Wang,

Xiaohan Qin,

Junchi Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Beichen and Wang, Xiaoxing and Qin, Xiaohan and Yan, Junchi}, title = {Boosting Order-Preserving and Transferability for Neural Architecture Search: a Joint Architecture Refined Search and Fine-tuning Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5662-5671} }
Dr. Bokeh: DiffeRentiable Occlusion-aware Bokeh Rendering: Yichen Sheng,

Zixun Yu,

Lu Ling,

Zhiwen Cao,

Xuaner Zhang,

Xin Lu,

Ke Xian,

Haiting Lin,

Bedrich Benes; [pdf] [supp]
[bibtex]
@InProceedings{Sheng_2024_CVPR, author = {Sheng, Yichen and Yu, Zixun and Ling, Lu and Cao, Zhiwen and Zhang, Xuaner and Lu, Xin and Xian, Ke and Lin, Haiting and Benes, Bedrich}, title = {Dr. Bokeh: DiffeRentiable Occlusion-aware Bokeh Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4515-4525} }
LAENeRF: Local Appearance Editing for Neural Radiance Fields: Lukas Radl,

Michael Steiner,

Andreas Kurz,

Markus Steinberger; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Radl_2024_CVPR, author = {Radl, Lukas and Steiner, Michael and Kurz, Andreas and Steinberger, Markus}, title = {LAENeRF: Local Appearance Editing for Neural Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4969-4978} }
Adversarial Score Distillation: When score distillation meets GAN: Min Wei,

Jingkai Zhou,

Junyao Sun,

Xuesong Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2024_CVPR, author = {Wei, Min and Zhou, Jingkai and Sun, Junyao and Zhang, Xuesong}, title = {Adversarial Score Distillation: When score distillation meets GAN}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8131-8141} }
Vector Graphics Generation via Mutually Impulsed Dual-domain Diffusion: Zhongyin Zhao,

Ye Chen,

Zhangli Hu,

Xuanhong Chen,

Bingbing Ni; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2024_CVPR, author = {Zhao, Zhongyin and Chen, Ye and Hu, Zhangli and Chen, Xuanhong and Ni, Bingbing}, title = {Vector Graphics Generation via Mutually Impulsed Dual-domain Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4420-4428} }
ScoreHypo: Probabilistic Human Mesh Estimation with Hypothesis Scoring: Yuan Xu,

Xiaoxuan Ma,

Jiajun Su,

Wentao Zhu,

Yu Qiao,

Yizhou Wang; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Yuan and Ma, Xiaoxuan and Su, Jiajun and Zhu, Wentao and Qiao, Yu and Wang, Yizhou}, title = {ScoreHypo: Probabilistic Human Mesh Estimation with Hypothesis Scoring}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {979-989} }
MeshPose: Unifying DensePose and 3D Body Mesh Reconstruction: Eric-Tuan Le,

Antonis Kakolyris,

Petros Koutras,

Himmy Tam,

Efstratios Skordos,

George Papandreou,

Riza Alp Güler,

Iasonas Kokkinos; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Le_2024_CVPR, author = {Le, Eric-Tuan and Kakolyris, Antonis and Koutras, Petros and Tam, Himmy and Skordos, Efstratios and Papandreou, George and G\"uler, Riza Alp and Kokkinos, Iasonas}, title = {MeshPose: Unifying DensePose and 3D Body Mesh Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2405-2414} }
Unsupervised Salient Instance Detection: Xin Tian,

Ke Xu,

Rynson Lau; [pdf]
[bibtex]
@InProceedings{Tian_2024_CVPR, author = {Tian, Xin and Xu, Ke and Lau, Rynson}, title = {Unsupervised Salient Instance Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2702-2712} }
Move Anything with Layered Scene Diffusion: Jiawei Ren,

Mengmeng Xu,

Jui-Chieh Wu,

Ziwei Liu,

Tao Xiang,

Antoine Toisoul; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2024_CVPR, author = {Ren, Jiawei and Xu, Mengmeng and Wu, Jui-Chieh and Liu, Ziwei and Xiang, Tao and Toisoul, Antoine}, title = {Move Anything with Layered Scene Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6380-6389} }
Human Gaussian Splatting: Real-time Rendering of Animatable Avatars: Arthur Moreau,

Jifei Song,

Helisa Dhamo,

Richard Shaw,

Yiren Zhou,

Eduardo Pérez-Pellitero; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Moreau_2024_CVPR, author = {Moreau, Arthur and Song, Jifei and Dhamo, Helisa and Shaw, Richard and Zhou, Yiren and P\'erez-Pellitero, Eduardo}, title = {Human Gaussian Splatting: Real-time Rendering of Animatable Avatars}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {788-798} }
The Devil is in the Details: StyleFeatureEditor for Detail-Rich StyleGAN Inversion and High Quality Image Editing: Denis Bobkov,

Vadim Titov,

Aibek Alanov,

Dmitry Vetrov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bobkov_2024_CVPR, author = {Bobkov, Denis and Titov, Vadim and Alanov, Aibek and Vetrov, Dmitry}, title = {The Devil is in the Details: StyleFeatureEditor for Detail-Rich StyleGAN Inversion and High Quality Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9337-9346} }
Unbiased Estimator for Distorted Conics in Camera Calibration: Chaehyeon Song,

Jaeho Shin,

Myung-Hwan Jeon,

Jongwoo Lim,

Ayoung Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2024_CVPR, author = {Song, Chaehyeon and Shin, Jaeho and Jeon, Myung-Hwan and Lim, Jongwoo and Kim, Ayoung}, title = {Unbiased Estimator for Distorted Conics in Camera Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {373-381} }
MultiPhys: Multi-Person Physics-aware 3D Motion Estimation: Nicolas Ugrinovic,

Boxiao Pan,

Georgios Pavlakos,

Despoina Paschalidou,

Bokui Shen,

Jordi Sanchez-Riera,

Francesc Moreno-Noguer,

Leonidas Guibas; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ugrinovic_2024_CVPR, author = {Ugrinovic, Nicolas and Pan, Boxiao and Pavlakos, Georgios and Paschalidou, Despoina and Shen, Bokui and Sanchez-Riera, Jordi and Moreno-Noguer, Francesc and Guibas, Leonidas}, title = {MultiPhys: Multi-Person Physics-aware 3D Motion Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2331-2340} }
NIVeL: Neural Implicit Vector Layers for Text-to-Vector Generation: Vikas Thamizharasan,

Difan Liu,

Matthew Fisher,

Nanxuan Zhao,

Evangelos Kalogerakis,

Michal Lukac; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Thamizharasan_2024_CVPR, author = {Thamizharasan, Vikas and Liu, Difan and Fisher, Matthew and Zhao, Nanxuan and Kalogerakis, Evangelos and Lukac, Michal}, title = {NIVeL: Neural Implicit Vector Layers for Text-to-Vector Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4589-4597} }
OAKINK2: A Dataset of Bimanual Hands-Object Manipulation in Complex Task Completion: Xinyu Zhan,

Lixin Yang,

Yifei Zhao,

Kangrui Mao,

Hanlin Xu,

Zenan Lin,

Kailin Li,

Cewu Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhan_2024_CVPR, author = {Zhan, Xinyu and Yang, Lixin and Zhao, Yifei and Mao, Kangrui and Xu, Hanlin and Lin, Zenan and Li, Kailin and Lu, Cewu}, title = {OAKINK2: A Dataset of Bimanual Hands-Object Manipulation in Complex Task Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {445-456} }
Text-Guided 3D Face Synthesis - From Generation to Editing: Yunjie Wu,

Yapeng Meng,

Zhipeng Hu,

Lincheng Li,

Haoqian Wu,

Kun Zhou,

Weiwei Xu,

Xin Yu; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, Yunjie and Meng, Yapeng and Hu, Zhipeng and Li, Lincheng and Wu, Haoqian and Zhou, Kun and Xu, Weiwei and Yu, Xin}, title = {Text-Guided 3D Face Synthesis - From Generation to Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1260-1269} }
Multiplane Prior Guided Few-Shot Aerial Scene Rendering: Zihan Gao,

Licheng Jiao,

Lingling Li,

Xu Liu,

Fang Liu,

Puhua Chen,

Yuwei Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2024_CVPR, author = {Gao, Zihan and Jiao, Licheng and Li, Lingling and Liu, Xu and Liu, Fang and Chen, Puhua and Guo, Yuwei}, title = {Multiplane Prior Guided Few-Shot Aerial Scene Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5009-5019} }
MAS: Multi-view Ancestral Sampling for 3D Motion Generation Using 2D Diffusion: Roy Kapon,

Guy Tevet,

Daniel Cohen-Or,

Amit H. Bermano; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kapon_2024_CVPR, author = {Kapon, Roy and Tevet, Guy and Cohen-Or, Daniel and Bermano, Amit H.}, title = {MAS: Multi-view Ancestral Sampling for 3D Motion Generation Using 2D Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1965-1974} }
Bilateral Event Mining and Complementary for Event Stream Super-Resolution: Zhilin Huang,

Quanmin Liang,

Yijie Yu,

Chujun Qin,

Xiawu Zheng,

Kai Huang,

Zikun Zhou,

Wenming Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2024_CVPR, author = {Huang, Zhilin and Liang, Quanmin and Yu, Yijie and Qin, Chujun and Zheng, Xiawu and Huang, Kai and Zhou, Zikun and Yang, Wenming}, title = {Bilateral Event Mining and Complementary for Event Stream Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {34-43} }
SANeRF-HQ: Segment Anything for NeRF in High Quality: Yichen Liu,

Benran Hu,

Chi-Keung Tang,

Yu-Wing Tai; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Yichen and Hu, Benran and Tang, Chi-Keung and Tai, Yu-Wing}, title = {SANeRF-HQ: Segment Anything for NeRF in High Quality}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3216-3226} }
Transcending the Limit of Local Window: Advanced Super-Resolution Transformer with Adaptive Token Dictionary: Leheng Zhang,

Yawei Li,

Xingyu Zhou,

Xiaorui Zhao,

Shuhang Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Leheng and Li, Yawei and Zhou, Xingyu and Zhao, Xiaorui and Gu, Shuhang}, title = {Transcending the Limit of Local Window: Advanced Super-Resolution Transformer with Adaptive Token Dictionary}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2856-2865} }
Mixed-Precision Quantization for Federated Learning on Resource-Constrained Heterogeneous Devices: Huancheng Chen,

Haris Vikalo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Huancheng and Vikalo, Haris}, title = {Mixed-Precision Quantization for Federated Learning on Resource-Constrained Heterogeneous Devices}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6138-6148} }
Neural Fields as Distributions: Signal Processing Beyond Euclidean Space: Daniel Rebain,

Soroosh Yazdani,

Kwang Moo Yi,

Andrea Tagliasacchi; [pdf] [supp]
[bibtex]
@InProceedings{Rebain_2024_CVPR, author = {Rebain, Daniel and Yazdani, Soroosh and Yi, Kwang Moo and Tagliasacchi, Andrea}, title = {Neural Fields as Distributions: Signal Processing Beyond Euclidean Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4274-4283} }
Style Blind Domain Generalized Semantic Segmentation via Covariance Alignment and Semantic Consistence Contrastive Learning: Woo-Jin Ahn,

Geun-Yeong Yang,

Hyun-Duck Choi,

Myo-Taeg Lim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ahn_2024_CVPR, author = {Ahn, Woo-Jin and Yang, Geun-Yeong and Choi, Hyun-Duck and Lim, Myo-Taeg}, title = {Style Blind Domain Generalized Semantic Segmentation via Covariance Alignment and Semantic Consistence Contrastive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3616-3626} }
X-3D: Explicit 3D Structure Modeling for Point Cloud Recognition: Shuofeng Sun,

Yongming Rao,

Jiwen Lu,

Haibin Yan; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2024_CVPR, author = {Sun, Shuofeng and Rao, Yongming and Lu, Jiwen and Yan, Haibin}, title = {X-3D: Explicit 3D Structure Modeling for Point Cloud Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5074-5083} }
One More Step: A Versatile Plug-and-Play Module for Rectifying Diffusion Schedule Flaws and Enhancing Low-Frequency Controls: Minghui Hu,

Jianbin Zheng,

Chuanxia Zheng,

Chaoyue Wang,

Dacheng Tao,

Tat-Jen Cham; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2024_CVPR, author = {Hu, Minghui and Zheng, Jianbin and Zheng, Chuanxia and Wang, Chaoyue and Tao, Dacheng and Cham, Tat-Jen}, title = {One More Step: A Versatile Plug-and-Play Module for Rectifying Diffusion Schedule Flaws and Enhancing Low-Frequency Controls}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7331-7340} }
HIVE: Harnessing Human Feedback for Instructional Visual Editing: Shu Zhang,

Xinyi Yang,

Yihao Feng,

Can Qin,

Chia-Chih Chen,

Ning Yu,

Zeyuan Chen,

Huan Wang,

Silvio Savarese,

Stefano Ermon,

Caiming Xiong,

Ran Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Shu and Yang, Xinyi and Feng, Yihao and Qin, Can and Chen, Chia-Chih and Yu, Ning and Chen, Zeyuan and Wang, Huan and Savarese, Silvio and Ermon, Stefano and Xiong, Caiming and Xu, Ran}, title = {HIVE: Harnessing Human Feedback for Instructional Visual Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9026-9036} }
StrokeFaceNeRF: Stroke-based Facial Appearance Editing in Neural Radiance Field: Xiao-Juan Li,

Dingxi Zhang,

Shu-Yu Chen,

Feng-Lin Liu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Xiao-Juan and Zhang, Dingxi and Chen, Shu-Yu and Liu, Feng-Lin}, title = {StrokeFaceNeRF: Stroke-based Facial Appearance Editing in Neural Radiance Field}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7538-7547} }
ProxyCap: Real-time Monocular Full-body Capture in World Space via Human-Centric Proxy-to-Motion Learning: Yuxiang Zhang,

Hongwen Zhang,

Liangxiao Hu,

Jiajun Zhang,

Hongwei Yi,

Shengping Zhang,

Yebin Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yuxiang and Zhang, Hongwen and Hu, Liangxiao and Zhang, Jiajun and Yi, Hongwei and Zhang, Shengping and Liu, Yebin}, title = {ProxyCap: Real-time Monocular Full-body Capture in World Space via Human-Centric Proxy-to-Motion Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1954-1964} }
On the Robustness of Language Guidance for Low-Level Vision Tasks: Findings from Depth Estimation: Agneet Chatterjee,

Tejas Gokhale,

Chitta Baral,

Yezhou Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chatterjee_2024_CVPR, author = {Chatterjee, Agneet and Gokhale, Tejas and Baral, Chitta and Yang, Yezhou}, title = {On the Robustness of Language Guidance for Low-Level Vision Tasks: Findings from Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2794-2803} }
UFOGen: You Forward Once Large Scale Text-to-Image Generation via Diffusion GANs: Yanwu Xu,

Yang Zhao,

Zhisheng Xiao,

Tingbo Hou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Yanwu and Zhao, Yang and Xiao, Zhisheng and Hou, Tingbo}, title = {UFOGen: You Forward Once Large Scale Text-to-Image Generation via Diffusion GANs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8196-8206} }
A Dual-Augmentor Framework for Domain Generalization in 3D Human Pose Estimation: Qucheng Peng,

Ce Zheng,

Chen Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2024_CVPR, author = {Peng, Qucheng and Zheng, Ce and Chen, Chen}, title = {A Dual-Augmentor Framework for Domain Generalization in 3D Human Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2240-2249} }
ACT-Diffusion: Efficient Adversarial Consistency Training for One-step Diffusion Models: Fei Kong,

Jinhao Duan,

Lichao Sun,

Hao Cheng,

Renjing Xu,

Hengtao Shen,

Xiaofeng Zhu,

Xiaoshuang Shi,

Kaidi Xu; [pdf] [supp]
[bibtex]
@InProceedings{Kong_2024_CVPR, author = {Kong, Fei and Duan, Jinhao and Sun, Lichao and Cheng, Hao and Xu, Renjing and Shen, Hengtao and Zhu, Xiaofeng and Shi, Xiaoshuang and Xu, Kaidi}, title = {ACT-Diffusion: Efficient Adversarial Consistency Training for One-step Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8890-8899} }
Spectral Meets Spatial: Harmonising 3D Shape Matching and Interpolation: Dongliang Cao,

Marvin Eisenberger,

Nafie El Amrani,

Daniel Cremers,

Florian Bernard; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2024_CVPR, author = {Cao, Dongliang and Eisenberger, Marvin and El Amrani, Nafie and Cremers, Daniel and Bernard, Florian}, title = {Spectral Meets Spatial: Harmonising 3D Shape Matching and Interpolation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3658-3668} }
Emu Edit: Precise Image Editing via Recognition and Generation Tasks: Shelly Sheynin,

Adam Polyak,

Uriel Singer,

Yuval Kirstain,

Amit Zohar,

Oron Ashual,

Devi Parikh,

Yaniv Taigman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sheynin_2024_CVPR, author = {Sheynin, Shelly and Polyak, Adam and Singer, Uriel and Kirstain, Yuval and Zohar, Amit and Ashual, Oron and Parikh, Devi and Taigman, Yaniv}, title = {Emu Edit: Precise Image Editing via Recognition and Generation Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8871-8879} }
Face2Diffusion for Fast and Editable Face Personalization: Kaede Shiohara,

Toshihiko Yamasaki; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shiohara_2024_CVPR, author = {Shiohara, Kaede and Yamasaki, Toshihiko}, title = {Face2Diffusion for Fast and Editable Face Personalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6850-6859} }
Dancing with Still Images: Video Distillation via Static-Dynamic Disentanglement: Ziyu Wang,

Yue Xu,

Cewu Lu,

Yong-Lu Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Ziyu and Xu, Yue and Lu, Cewu and Li, Yong-Lu}, title = {Dancing with Still Images: Video Distillation via Static-Dynamic Disentanglement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6296-6304} }
UniRepLKNet: A Universal Perception Large-Kernel ConvNet for Audio Video Point Cloud Time-Series and Image Recognition: Xiaohan Ding,

Yiyuan Zhang,

Yixiao Ge,

Sijie Zhao,

Lin Song,

Xiangyu Yue,

Ying Shan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ding_2024_CVPR, author = {Ding, Xiaohan and Zhang, Yiyuan and Ge, Yixiao and Zhao, Sijie and Song, Lin and Yue, Xiangyu and Shan, Ying}, title = {UniRepLKNet: A Universal Perception Large-Kernel ConvNet for Audio Video Point Cloud Time-Series and Image Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5513-5524} }
SwiftBrush: One-Step Text-to-Image Diffusion Model with Variational Score Distillation: Thuan Hoang Nguyen,

Anh Tran; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2024_CVPR, author = {Nguyen, Thuan Hoang and Tran, Anh}, title = {SwiftBrush: One-Step Text-to-Image Diffusion Model with Variational Score Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7807-7816} }
DEADiff: An Efficient Stylization Diffusion Model with Disentangled Representations: Tianhao Qi,

Shancheng Fang,

Yanze Wu,

Hongtao Xie,

Jiawei Liu,

Lang Chen,

Qian He,

Yongdong Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qi_2024_CVPR, author = {Qi, Tianhao and Fang, Shancheng and Wu, Yanze and Xie, Hongtao and Liu, Jiawei and Chen, Lang and He, Qian and Zhang, Yongdong}, title = {DEADiff: An Efficient Stylization Diffusion Model with Disentangled Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8693-8702} }
Exact Fusion via Feature Distribution Matching for Few-shot Image Generation: Yingbo Zhou,

Yutong Ye,

Pengyu Zhang,

Xian Wei,

Mingsong Chen; [pdf]
[bibtex]
@InProceedings{Zhou_2024_CVPR, author = {Zhou, Yingbo and Ye, Yutong and Zhang, Pengyu and Wei, Xian and Chen, Mingsong}, title = {Exact Fusion via Feature Distribution Matching for Few-shot Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8383-8392} }
CoDeF: Content Deformation Fields for Temporally Consistent Video Processing: Hao Ouyang,

Qiuyu Wang,

Yuxi Xiao,

Qingyan Bai,

Juntao Zhang,

Kecheng Zheng,

Xiaowei Zhou,

Qifeng Chen,

Yujun Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ouyang_2024_CVPR, author = {Ouyang, Hao and Wang, Qiuyu and Xiao, Yuxi and Bai, Qingyan and Zhang, Juntao and Zheng, Kecheng and Zhou, Xiaowei and Chen, Qifeng and Shen, Yujun}, title = {CoDeF: Content Deformation Fields for Temporally Consistent Video Processing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8089-8099} }
QUADify: Extracting Meshes with Pixel-level Details and Materials from Images: Maximilian Frühauf,

Hayko Riemenschneider,

Markus Gross,

Christopher Schroers; [pdf] [supp]
[bibtex]
@InProceedings{Fruhauf_2024_CVPR, author = {Fr\"uhauf, Maximilian and Riemenschneider, Hayko and Gross, Markus and Schroers, Christopher}, title = {QUADify: Extracting Meshes with Pixel-level Details and Materials from Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4661-4670} }
RecDiffusion: Rectangling for Image Stitching with Diffusion Models: Tianhao Zhou,

Haipeng Li,

Ziyi Wang,

Ao Luo,

Chen-Lin Zhang,

Jiajun Li,

Bing Zeng,

Shuaicheng Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2024_CVPR, author = {Zhou, Tianhao and Li, Haipeng and Wang, Ziyi and Luo, Ao and Zhang, Chen-Lin and Li, Jiajun and Zeng, Bing and Liu, Shuaicheng}, title = {RecDiffusion: Rectangling for Image Stitching with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2692-2701} }
Eclipse: Disambiguating Illumination and Materials using Unintended Shadows: Dor Verbin,

Ben Mildenhall,

Peter Hedman,

Jonathan T. Barron,

Todd Zickler,

Pratul P. Srinivasan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Verbin_2024_CVPR, author = {Verbin, Dor and Mildenhall, Ben and Hedman, Peter and Barron, Jonathan T. and Zickler, Todd and Srinivasan, Pratul P.}, title = {Eclipse: Disambiguating Illumination and Materials using Unintended Shadows}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {77-86} }
Balancing Act: Distribution-Guided Debiasing in Diffusion Models: Rishubh Parihar,

Abhijnya Bhat,

Abhipsa Basu,

Saswat Mallick,

Jogendra Nath Kundu,

R. Venkatesh Babu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Parihar_2024_CVPR, author = {Parihar, Rishubh and Bhat, Abhijnya and Basu, Abhipsa and Mallick, Saswat and Kundu, Jogendra Nath and Babu, R. Venkatesh}, title = {Balancing Act: Distribution-Guided Debiasing in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6668-6678} }
Differentiable Point-based Inverse Rendering: Hoon-Gyu Chung,

Seokjun Choi,

Seung-Hwan Baek; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chung_2024_CVPR, author = {Chung, Hoon-Gyu and Choi, Seokjun and Baek, Seung-Hwan}, title = {Differentiable Point-based Inverse Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4399-4409} }
A Unified and Interpretable Emotion Representation and Expression Generation: Reni Paskaleva,

Mykyta Holubakha,

Andela Ilic,

Saman Motamed,

Luc Van Gool,

Danda Paudel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Paskaleva_2024_CVPR, author = {Paskaleva, Reni and Holubakha, Mykyta and Ilic, Andela and Motamed, Saman and Van Gool, Luc and Paudel, Danda}, title = {A Unified and Interpretable Emotion Representation and Expression Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2447-2456} }
Upscale-A-Video: Temporal-Consistent Diffusion Model for Real-World Video Super-Resolution: Shangchen Zhou,

Peiqing Yang,

Jianyi Wang,

Yihang Luo,

Chen Change Loy; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2024_CVPR, author = {Zhou, Shangchen and Yang, Peiqing and Wang, Jianyi and Luo, Yihang and Loy, Chen Change}, title = {Upscale-A-Video: Temporal-Consistent Diffusion Model for Real-World Video Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2535-2545} }
4D-DRESS: A 4D Dataset of Real-World Human Clothing With Semantic Annotations: Wenbo Wang,

Hsuan-I Ho,

Chen Guo,

Boxiang Rong,

Artur Grigorev,

Jie Song,

Juan Jose Zarate,

Otmar Hilliges; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Wenbo and Ho, Hsuan-I and Guo, Chen and Rong, Boxiang and Grigorev, Artur and Song, Jie and Zarate, Juan Jose and Hilliges, Otmar}, title = {4D-DRESS: A 4D Dataset of Real-World Human Clothing With Semantic Annotations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {550-560} }
Specularity Factorization for Low-Light Enhancement: Saurabh Saini,

P J Narayanan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Saini_2024_CVPR, author = {Saini, Saurabh and Narayanan, P J}, title = {Specularity Factorization for Low-Light Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1-12} }
Paint3D: Paint Anything 3D with Lighting-Less Texture Diffusion Models: Xianfang Zeng,

Xin Chen,

Zhongqi Qi,

Wen Liu,

Zibo Zhao,

Zhibin Wang,

Bin Fu,

Yong Liu,

Gang Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2024_CVPR, author = {Zeng, Xianfang and Chen, Xin and Qi, Zhongqi and Liu, Wen and Zhao, Zibo and Wang, Zhibin and Fu, Bin and Liu, Yong and Yu, Gang}, title = {Paint3D: Paint Anything 3D with Lighting-Less Texture Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4252-4262} }
MS-MANO: Enabling Hand Pose Tracking with Biomechanical Constraints: Pengfei Xie,

Wenqiang Xu,

Tutian Tang,

Zhenjun Yu,

Cewu Lu; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2024_CVPR, author = {Xie, Pengfei and Xu, Wenqiang and Tang, Tutian and Yu, Zhenjun and Lu, Cewu}, title = {MS-MANO: Enabling Hand Pose Tracking with Biomechanical Constraints}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2382-2392} }
Generate Like Experts: Multi-Stage Font Generation by Incorporating Font Transfer Process into Diffusion Models: Bin Fu,

Fanghua Yu,

Anran Liu,

Zixuan Wang,

Jie Wen,

Junjun He,

Yu Qiao; [pdf] [supp]
[bibtex]
@InProceedings{Fu_2024_CVPR, author = {Fu, Bin and Yu, Fanghua and Liu, Anran and Wang, Zixuan and Wen, Jie and He, Junjun and Qiao, Yu}, title = {Generate Like Experts: Multi-Stage Font Generation by Incorporating Font Transfer Process into Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6892-6901} }
Diffuse Attend and Segment: Unsupervised Zero-Shot Segmentation using Stable Diffusion: Junjiao Tian,

Lavisha Aggarwal,

Andrea Colaco,

Zsolt Kira,

Mar Gonzalez-Franco; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tian_2024_CVPR, author = {Tian, Junjiao and Aggarwal, Lavisha and Colaco, Andrea and Kira, Zsolt and Gonzalez-Franco, Mar}, title = {Diffuse Attend and Segment: Unsupervised Zero-Shot Segmentation using Stable Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3554-3563} }
Implicit Discriminative Knowledge Learning for Visible-Infrared Person Re-Identification: Kaijie Ren,

Lei Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Ren_2024_CVPR, author = {Ren, Kaijie and Zhang, Lei}, title = {Implicit Discriminative Knowledge Learning for Visible-Infrared Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {393-402} }
Gradient Alignment for Cross-Domain Face Anti-Spoofing: Binh M. Le,

Simon S. Woo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Le_2024_CVPR, author = {Le, Binh M. and Woo, Simon S.}, title = {Gradient Alignment for Cross-Domain Face Anti-Spoofing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {188-199} }
OpticalDR: A Deep Optical Imaging Model for Privacy-Protective Depression Recognition: Yuchen Pan,

Junjun Jiang,

Kui Jiang,

Zhihao Wu,

Keyuan Yu,

Xianming Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2024_CVPR, author = {Pan, Yuchen and Jiang, Junjun and Jiang, Kui and Wu, Zhihao and Yu, Keyuan and Liu, Xianming}, title = {OpticalDR: A Deep Optical Imaging Model for Privacy-Protective Depression Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1303-1312} }
Observation-Guided Diffusion Probabilistic Models: Junoh Kang,

Jinyoung Choi,

Sungik Choi,

Bohyung Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2024_CVPR, author = {Kang, Junoh and Choi, Jinyoung and Choi, Sungik and Han, Bohyung}, title = {Observation-Guided Diffusion Probabilistic Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8323-8331} }
Spatial-Aware Regression for Keypoint Localization: Dongkai Wang,

Shiliang Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Dongkai and Zhang, Shiliang}, title = {Spatial-Aware Regression for Keypoint Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {624-633} }
EFormer: Enhanced Transformer towards Semantic-Contour Features of Foreground for Portraits Matting: Zitao Wang,

Qiguang Miao,

Yue Xi,

Peipei Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Zitao and Miao, Qiguang and Xi, Yue and Zhao, Peipei}, title = {EFormer: Enhanced Transformer towards Semantic-Contour Features of Foreground for Portraits Matting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3880-3889} }
MultiPly: Reconstruction of Multiple People from Monocular Video in the Wild: Zeren Jiang,

Chen Guo,

Manuel Kaufmann,

Tianjian Jiang,

Julien Valentin,

Otmar Hilliges,

Jie Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2024_CVPR, author = {Jiang, Zeren and Guo, Chen and Kaufmann, Manuel and Jiang, Tianjian and Valentin, Julien and Hilliges, Otmar and Song, Jie}, title = {MultiPly: Reconstruction of Multiple People from Monocular Video in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {109-118} }
ConsistNet: Enforcing 3D Consistency for Multi-view Images Diffusion: Jiayu Yang,

Ziang Cheng,

Yunfei Duan,

Pan Ji,

Hongdong Li; [pdf] [arXiv]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Jiayu and Cheng, Ziang and Duan, Yunfei and Ji, Pan and Li, Hongdong}, title = {ConsistNet: Enforcing 3D Consistency for Multi-view Images Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7079-7088} }
GenN2N: Generative NeRF2NeRF Translation: Xiangyue Liu,

Han Xue,

Kunming Luo,

Ping Tan,

Li Yi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Xiangyue and Xue, Han and Luo, Kunming and Tan, Ping and Yi, Li}, title = {GenN2N: Generative NeRF2NeRF Translation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5105-5114} }
Universal Robustness via Median Randomized Smoothing for Real-World Super-Resolution: Zakariya Chaouai,

Mohamed Tamaazousti; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chaouai_2024_CVPR, author = {Chaouai, Zakariya and Tamaazousti, Mohamed}, title = {Universal Robustness via Median Randomized Smoothing for Real-World Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9059-9068} }
One-dimensional Adapter to Rule Them All: Concepts Diffusion Models and Erasing Applications: Mengyao Lyu,

Yuhong Yang,

Haiwen Hong,

Hui Chen,

Xuan Jin,

Yuan He,

Hui Xue,

Jungong Han,

Guiguang Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lyu_2024_CVPR, author = {Lyu, Mengyao and Yang, Yuhong and Hong, Haiwen and Chen, Hui and Jin, Xuan and He, Yuan and Xue, Hui and Han, Jungong and Ding, Guiguang}, title = {One-dimensional Adapter to Rule Them All: Concepts Diffusion Models and Erasing Applications}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7559-7568} }
Kandinsky Conformal Prediction: Efficient Calibration of Image Segmentation Algorithms: Joren Brunekreef,

Eric Marcus,

Ray Sheombarsing,

Jan-Jakob Sonke,

Jonas Teuwen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Brunekreef_2024_CVPR, author = {Brunekreef, Joren and Marcus, Eric and Sheombarsing, Ray and Sonke, Jan-Jakob and Teuwen, Jonas}, title = {Kandinsky Conformal Prediction: Efficient Calibration of Image Segmentation Algorithms}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4135-4143} }
Diversity-aware Channel Pruning for StyleGAN Compression: Jiwoo Chung,

Sangeek Hyun,

Sang-Heon Shim,

Jae-Pil Heo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chung_2024_CVPR, author = {Chung, Jiwoo and Hyun, Sangeek and Shim, Sang-Heon and Heo, Jae-Pil}, title = {Diversity-aware Channel Pruning for StyleGAN Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7902-7911} }
Neural Clustering based Visual Representation Learning: Guikun Chen,

Xia Li,

Yi Yang,

Wenguan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Guikun and Li, Xia and Yang, Yi and Wang, Wenguan}, title = {Neural Clustering based Visual Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5714-5725} }
Sparse Semi-DETR: Sparse Learnable Queries for Semi-Supervised Object Detection: Tahira Shehzadi,

Khurram Azeem Hashmi,

Didier Stricker,

Muhammad Zeshan Afzal; [pdf] [supp]
[bibtex]
@InProceedings{Shehzadi_2024_CVPR, author = {Shehzadi, Tahira and Hashmi, Khurram Azeem and Stricker, Didier and Afzal, Muhammad Zeshan}, title = {Sparse Semi-DETR: Sparse Learnable Queries for Semi-Supervised Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5840-5850} }
Uncertainty-Aware Source-Free Adaptive Image Super-Resolution with Wavelet Augmentation Transformer: Yuang Ai,

Xiaoqiang Zhou,

Huaibo Huang,

Lei Zhang,

Ran He; [pdf] [arXiv]
[bibtex]
@InProceedings{Ai_2024_CVPR, author = {Ai, Yuang and Zhou, Xiaoqiang and Huang, Huaibo and Zhang, Lei and He, Ran}, title = {Uncertainty-Aware Source-Free Adaptive Image Super-Resolution with Wavelet Augmentation Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8142-8152} }
Spacetime Gaussian Feature Splatting for Real-Time Dynamic View Synthesis: Zhan Li,

Zhang Chen,

Zhong Li,

Yi Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Zhan and Chen, Zhang and Li, Zhong and Xu, Yi}, title = {Spacetime Gaussian Feature Splatting for Real-Time Dynamic View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8508-8520} }
Instruct-Imagen: Image Generation with Multi-modal Instruction: Hexiang Hu,

Kelvin C.K. Chan,

Yu-Chuan Su,

Wenhu Chen,

Yandong Li,

Kihyuk Sohn,

Yang Zhao,

Xue Ben,

Boqing Gong,

William Cohen,

Ming-Wei Chang,

Xuhui Jia; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2024_CVPR, author = {Hu, Hexiang and Chan, Kelvin C.K. and Su, Yu-Chuan and Chen, Wenhu and Li, Yandong and Sohn, Kihyuk and Zhao, Yang and Ben, Xue and Gong, Boqing and Cohen, William and Chang, Ming-Wei and Jia, Xuhui}, title = {Instruct-Imagen: Image Generation with Multi-modal Instruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4754-4763} }
Rethinking Few-shot 3D Point Cloud Semantic Segmentation: Zhaochong An,

Guolei Sun,

Yun Liu,

Fayao Liu,

Zongwei Wu,

Dan Wang,

Luc Van Gool,

Serge Belongie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{An_2024_CVPR, author = {An, Zhaochong and Sun, Guolei and Liu, Yun and Liu, Fayao and Wu, Zongwei and Wang, Dan and Van Gool, Luc and Belongie, Serge}, title = {Rethinking Few-shot 3D Point Cloud Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3996-4006} }
GreedyViG: Dynamic Axial Graph Construction for Efficient Vision GNNs: Mustafa Munir,

William Avery,

Md Mostafijur Rahman,

Radu Marculescu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Munir_2024_CVPR, author = {Munir, Mustafa and Avery, William and Rahman, Md Mostafijur and Marculescu, Radu}, title = {GreedyViG: Dynamic Axial Graph Construction for Efficient Vision GNNs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6118-6127} }
Relightable and Animatable Neural Avatar from Sparse-View Video: Zhen Xu,

Sida Peng,

Chen Geng,

Linzhan Mou,

Zihan Yan,

Jiaming Sun,

Hujun Bao,

Xiaowei Zhou; [pdf] [arXiv]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Zhen and Peng, Sida and Geng, Chen and Mou, Linzhan and Yan, Zihan and Sun, Jiaming and Bao, Hujun and Zhou, Xiaowei}, title = {Relightable and Animatable Neural Avatar from Sparse-View Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {990-1000} }
Pose Adapted Shape Learning for Large-Pose Face Reenactment: Gee-Sern Jison Hsu,

Jie-Ying Zhang,

Huang Yu Hsiang,

Wei-Jie Hong; [pdf] [supp]
[bibtex]
@InProceedings{Hsu_2024_CVPR, author = {Hsu, Gee-Sern Jison and Zhang, Jie-Ying and Hsiang, Huang Yu and Hong, Wei-Jie}, title = {Pose Adapted Shape Learning for Large-Pose Face Reenactment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7413-7422} }
NRDF: Neural Riemannian Distance Fields for Learning Articulated Pose Priors: Yannan He,

Garvita Tiwari,

Tolga Birdal,

Jan Eric Lenssen,

Gerard Pons-Moll; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2024_CVPR, author = {He, Yannan and Tiwari, Garvita and Birdal, Tolga and Lenssen, Jan Eric and Pons-Moll, Gerard}, title = {NRDF: Neural Riemannian Distance Fields for Learning Articulated Pose Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {1661-1671} }
RepAn: Enhanced Annealing through Re-parameterization: Xiang Fei,

Xiawu Zheng,

Yan Wang,

Fei Chao,

Chenglin Wu,

Liujuan Cao; [pdf] [supp]
[bibtex]
@InProceedings{Fei_2024_CVPR, author = {Fei, Xiang and Zheng, Xiawu and Wang, Yan and Chao, Fei and Wu, Chenglin and Cao, Liujuan}, title = {RepAn: Enhanced Annealing through Re-parameterization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5798-5808} }
DreamControl: Control-Based Text-to-3D Generation with 3D Self-Prior: Tianyu Huang,

Yihan Zeng,

Zhilu Zhang,

Wan Xu,

Hang Xu,

Songcen Xu,

Rynson W.H. Lau,

Wangmeng Zuo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2024_CVPR, author = {Huang, Tianyu and Zeng, Yihan and Zhang, Zhilu and Xu, Wan and Xu, Hang and Xu, Songcen and Lau, Rynson W.H. and Zuo, Wangmeng}, title = {DreamControl: Control-Based Text-to-3D Generation with 3D Self-Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5364-5373} }
ODIN: A Single Model for 2D and 3D Segmentation: Ayush Jain,

Pushkal Katara,

Nikolaos Gkanatsios,

Adam W. Harley,

Gabriel Sarch,

Kriti Aggarwal,

Vishrav Chaudhary,

Katerina Fragkiadaki; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jain_2024_CVPR, author = {Jain, Ayush and Katara, Pushkal and Gkanatsios, Nikolaos and Harley, Adam W. and Sarch, Gabriel and Aggarwal, Kriti and Chaudhary, Vishrav and Fragkiadaki, Katerina}, title = {ODIN: A Single Model for 2D and 3D Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3564-3574} }
InitNO: Boosting Text-to-Image Diffusion Models via Initial Noise Optimization: Xiefan Guo,

Jinlin Liu,

Miaomiao Cui,

Jiankai Li,

Hongyu Yang,

Di Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2024_CVPR, author = {Guo, Xiefan and Liu, Jinlin and Cui, Miaomiao and Li, Jiankai and Yang, Hongyu and Huang, Di}, title = {InitNO: Boosting Text-to-Image Diffusion Models via Initial Noise Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9380-9389} }
Multimodal Sense-Informed Forecasting of 3D Human Motions: Zhenyu Lou,

Qiongjie Cui,

Haofan Wang,

Xu Tang,

Hong Zhou; [pdf]
[bibtex]
@InProceedings{Lou_2024_CVPR, author = {Lou, Zhenyu and Cui, Qiongjie and Wang, Haofan and Tang, Xu and Zhou, Hong}, title = {Multimodal Sense-Informed Forecasting of 3D Human Motions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {2144-2154} }
FlowerFormer: Empowering Neural Architecture Encoding using a Flow-aware Graph Transformer: Dongyeong Hwang,

Hyunju Kim,

Sunwoo Kim,

Kijung Shin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hwang_2024_CVPR, author = {Hwang, Dongyeong and Kim, Hyunju and Kim, Sunwoo and Shin, Kijung}, title = {FlowerFormer: Empowering Neural Architecture Encoding using a Flow-aware Graph Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6128-6137} }
EmoGen: Emotional Image Content Generation with Text-to-Image Diffusion Models: Jingyuan Yang,

Jiawei Feng,

Hui Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Jingyuan and Feng, Jiawei and Huang, Hui}, title = {EmoGen: Emotional Image Content Generation with Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {6358-6368} }
Neural Implicit Representation for Building Digital Twins of Unknown Articulated Objects: Yijia Weng,

Bowen Wen,

Jonathan Tremblay,

Valts Blukis,

Dieter Fox,

Leonidas Guibas,

Stan Birchfield; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Weng_2024_CVPR, author = {Weng, Yijia and Wen, Bowen and Tremblay, Jonathan and Blukis, Valts and Fox, Dieter and Guibas, Leonidas and Birchfield, Stan}, title = {Neural Implicit Representation for Building Digital Twins of Unknown Articulated Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3141-3150} }
Vanishing-Point-Guided Video Semantic Segmentation of Driving Scenes: Diandian Guo,

Deng-Ping Fan,

Tongyu Lu,

Christos Sakaridis,

Luc Van Gool; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2024_CVPR, author = {Guo, Diandian and Fan, Deng-Ping and Lu, Tongyu and Sakaridis, Christos and Van Gool, Luc}, title = {Vanishing-Point-Guided Video Semantic Segmentation of Driving Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {3544-3553} }
LAMP: Learn A Motion Pattern for Few-Shot Video Generation: Ruiqi Wu,

Liangyu Chen,

Tong Yang,

Chunle Guo,

Chongyi Li,

Xiangyu Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, Ruiqi and Chen, Liangyu and Yang, Tong and Guo, Chunle and Li, Chongyi and Zhang, Xiangyu}, title = {LAMP: Learn A Motion Pattern for Few-Shot Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {7089-7098} }
Language-driven Object Fusion into Neural Radiance Fields with Pose-Conditioned Dataset Updates: Ka Chun Shum,

Jaeyeon Kim,

Binh-Son Hua,

Duc Thanh Nguyen,

Sai-Kit Yeung; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shum_2024_CVPR, author = {Shum, Ka Chun and Kim, Jaeyeon and Hua, Binh-Son and Nguyen, Duc Thanh and Yeung, Sai-Kit}, title = {Language-driven Object Fusion into Neural Radiance Fields with Pose-Conditioned Dataset Updates}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {5176-5187} }
DREAM: Diffusion Rectification and Estimation-Adaptive Models: Jinxin Zhou,

Tianyu Ding,

Tianyi Chen,

Jiachen Jiang,

Ilya Zharkov,

Zhihui Zhu,

Luming Liang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2024_CVPR, author = {Zhou, Jinxin and Ding, Tianyu and Chen, Tianyi and Jiang, Jiachen and Zharkov, Ilya and Zhu, Zhihui and Liang, Luming}, title = {DREAM: Diffusion Rectification and Estimation-Adaptive Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {8342-8351} }; Back