Papers
- Back
kh: Symmetry Understanding of 3D Shapes via Chirality Disentanglement-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Weikang and Wei{\ss}berg, Tobias and El Amrani, Nafie and Bernard, Florian}, title = {kh: Symmetry Understanding of 3D Shapes via Chirality Disentanglement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28292-28302} }
One Polyp Identifies All: One-Shot Polyp Segmentation with SAM via Cascaded Priors and Iterative Prompt Evolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mao_2025_ICCV, author = {Mao, Xinyu and Xing, Xiaohan and Meng, Fei and Liu, Jianbang and Bai, Fan and Nie, Qiang and Meng, Max}, title = {One Polyp Identifies All: One-Shot Polyp Segmentation with SAM via Cascaded Priors and Iterative Prompt Evolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24182-24191} }
From Gaze to Movement: Predicting Visual Attention for Autonomous Driving Human-Machine Interaction based on Programmatic Imitation Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Yexin and Lin, Yongbin and Yue, Lishengsa and Yao, Zhihong and Wang, Jie}, title = {From Gaze to Movement: Predicting Visual Attention for Autonomous Driving Human-Machine Interaction based on Programmatic Imitation Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26146-26155} }
Rethinking Detecting Salient and Camouflaged Objects in Unconstrained Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Zhangjun and Li, Yiping and Zhong, Chunlin and Huang, Jianuo and Pei, Jialun and Li, Hua and Tang, He}, title = {Rethinking Detecting Salient and Camouflaged Objects in Unconstrained Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22372-22382} }
OccluGaussian: Occlusion-Aware Gaussian Splatting for Large Scene Reconstruction and Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Shiyong and Tang, Xiao and Li, Zhihao and He, Yingfan and Ye, Chongjie and Liu, Jianzhuang and Huang, Binxiao and Zhou, Shunbo and Wu, Xiaofei}, title = {OccluGaussian: Occlusion-Aware Gaussian Splatting for Large Scene Reconstruction and Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26643-26652} }
Unsupervised RGB-D Point Cloud Registration for Scenes with Low Overlap and Photometric Inconsistency-
[pdf]
[supp]
[bibtex]@InProceedings{Shou_2025_ICCV, author = {Shou, Yejun and Wang, Haocheng and Shen, Lingfeng and Zheng, Qian and Pan, Gang and Cao, Yanlong}, title = {Unsupervised RGB-D Point Cloud Registration for Scenes with Low Overlap and Photometric Inconsistency}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24868-24877} }
Demeter: A Parametric Model of Crop Plant Morphology from the Real World-
[pdf]
[supp]
[bibtex]@InProceedings{Cheng_2025_ICCV, author = {Cheng, Tianhang and Zhai, Albert J. and Chen, Evan Z. and Zhou, Rui and Deng, Yawen and Li, Zitong and Zhao, Kejie and Shiu, Janice and Zhao, Qianyu and Xu, Yide and Wang, Xinlei and Shen, Yuan and Wang, Sheng and Ainsworth, Lisa and Guan, Kaiyu and Wang, Shenlong}, title = {Demeter: A Parametric Model of Crop Plant Morphology from the Real World}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28740-28751} }
VideoLLaMB: Long Streaming Video Understanding with Recurrent Memory Bridges-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Yuxuan and Song, Yiqi and Xie, Cihang and Liu, Yang and Zheng, Zilong}, title = {VideoLLaMB: Long Streaming Video Understanding with Recurrent Memory Bridges}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24170-24181} }
HiERO: Understanding the Hierarchy of Human Behavior Enhances Reasoning on Egocentric Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peirone_2025_ICCV, author = {Peirone, Simone Alberto and Pistilli, Francesca and Averta, Giuseppe}, title = {HiERO: Understanding the Hierarchy of Human Behavior Enhances Reasoning on Egocentric Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19862-19871} }
FVGen: Accelerating Novel-View Synthesis with Adversarial Video Diffusion Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Teng_2025_ICCV, author = {Teng, Wenbin and Chen, Gonglin and Chen, Haiwei and Zhao, Yajie}, title = {FVGen: Accelerating Novel-View Synthesis with Adversarial Video Diffusion Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26095-26105} }
ReconDreamer++: Harmonizing Generative and Reconstructive Models for Driving Scene Representation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_ICCV, author = {Zhao, Guosheng and Wang, Xiaofeng and Ni, Chaojun and Zhu, Zheng and Qin, Wenkang and Huang, Guan and Wang, Xingang}, title = {ReconDreamer++: Harmonizing Generative and Reconstructive Models for Driving Scene Representation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26718-26728} }
Mind the Gap: Aligning Vision Foundation Models to Image Feature Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Yuhan and Fu, Jingwen and Wu, Yang and Wu, Kangyi and Li, Pengna and Wu, Jiayi and Zhou, Sanping and Xin, Jingmin}, title = {Mind the Gap: Aligning Vision Foundation Models to Image Feature Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20313-20323} }
CoStoDet-DDPM: Collaborative Training of Stochastic and Deterministic Models Improves Surgical Workflow Anticipation and Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Kaixiang and Li, Xin and Li, Qiang and Wang, Zhiwei}, title = {CoStoDet-DDPM: Collaborative Training of Stochastic and Deterministic Models Improves Surgical Workflow Anticipation and Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23741-23751} }
RareCLIP: Rarity-aware Online Zero-shot Industrial Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{He_2025_ICCV, author = {He, Jianfang and Cao, Min and Peng, Silong and Xie, Qiong}, title = {RareCLIP: Rarity-aware Online Zero-shot Industrial Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24478-24487} }
HERMES: A Unified Self-Driving World Model for Simultaneous 3D Scene Understanding and Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Xin and Liang, Dingkang and Tu, Sifan and Chen, Xiwu and Ding, Yikang and Zhang, Dingyuan and Tan, Feiyang and Zhao, Hengshuang and Bai, Xiang}, title = {HERMES: A Unified Self-Driving World Model for Simultaneous 3D Scene Understanding and Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27817-27827} }
ArgMatch: Adaptive Refinement Gathering for Efficient Dense Matching-
[pdf]
[supp]
[bibtex]@InProceedings{Deng_2025_ICCV, author = {Deng, Yuxin and Zhang, Kaining and Tang, Linfeng and Yang, Jiaqi and Ma, Jiayi}, title = {ArgMatch: Adaptive Refinement Gathering for Efficient Dense Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27369-27379} }
Free4D: Tuning-free 4D Scene Generation with Spatial-Temporal Consistency-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Tianqi and Huang, Zihao and Chen, Zhaoxi and Wang, Guangcong and Hu, Shoukang and Shen, Liao and Sun, Huiqiang and Cao, Zhiguo and Li, Wei and Liu, Ziwei}, title = {Free4D: Tuning-free 4D Scene Generation with Spatial-Temporal Consistency}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25571-25582} }
Spherical Epipolar Rectification for Deep Two-View Absolute Depth Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Brousseau_2025_ICCV, author = {Brousseau, Pierre-Andr\'e and Roy, S\'ebastien}, title = {Spherical Epipolar Rectification for Deep Two-View Absolute Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28925-28934} }
ScenePainter: Semantically Consistent Perpetual 3D Scene Generation with Concept Relation Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2025_ICCV, author = {Xia, Chong and Zhang, Shengjun and Liu, Fangfu and Liu, Chang and Hirunyaratsameewong, Khodchaphun and Duan, Yueqi}, title = {ScenePainter: Semantically Consistent Perpetual 3D Scene Generation with Concept Relation Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28808-28817} }
ESCNet:Edge-Semantic Collaborative Network for Camouflaged Object Detection-
[pdf]
[bibtex]@InProceedings{Ye_2025_ICCV, author = {Ye, Sheng and Chen, Xin and Zhang, Yan and Lin, Xianming and Cao, Liujuan}, title = {ESCNet:Edge-Semantic Collaborative Network for Camouflaged Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20053-20063} }
PixelStitch: Structure-Preserving Pixel-Wise Bidirectional Warps for Unsupervised Image Stitching-
[pdf]
[supp]
[bibtex]@InProceedings{Jin_2025_ICCV, author = {Jin, Hengzhe and Nie, Lang and Lin, Chunyu and Feng, Xiaomei and Zhao, Yao}, title = {PixelStitch: Structure-Preserving Pixel-Wise Bidirectional Warps for Unsupervised Image Stitching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28125-28134} }
VLR-Driver: Large Vision-Language-Reasoning Models for Embodied Autonomous Driving-
[pdf]
[bibtex]@InProceedings{Kong_2025_ICCV, author = {Kong, Fanjie and Li, Yitong and Chen, Weihuang and Min, Chen and Li, Yizhe and Gao, Zhiqiang and Li, Haoyang and Guo, Zhongyu and Sun, Hongbin}, title = {VLR-Driver: Large Vision-Language-Reasoning Models for Embodied Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26966-26976} }
ResGS: Residual Densification of 3D Gaussian for Efficient Detail Recovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lyu_2025_ICCV, author = {Lyu, Yanzhe and Cheng, Kai and Kang, Xin and Chen, Xuejin}, title = {ResGS: Residual Densification of 3D Gaussian for Efficient Detail Recovery}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28093-28102} }
Prior2Former - Evidential Modeling of Mask Transformers for Assumption-Free Open-World Panoptic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Schmidt_2025_ICCV, author = {Schmidt, Sebastian and Koerner, Julius and Fuchsgruber, Dominik and Gasperini, Stefano and Tombari, Federico and G\"unnemann, Stephan}, title = {Prior2Former - Evidential Modeling of Mask Transformers for Assumption-Free Open-World Panoptic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23646-23656} }
Optimal Transport for Brain-Image Alignment: Unveiling Redundancy and Synergy in Neural Information Processing-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xiao_2025_ICCV, author = {Xiao, Yang and Lu, Wang and Ji, Jie and Ye, Ruimeng and Li, Gen and Ma, Xiaolong and Hui, Bo}, title = {Optimal Transport for Brain-Image Alignment: Unveiling Redundancy and Synergy in Neural Information Processing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20445-20455} }
E-SAM: Training-Free Segment Every Entity Model-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Weiming and Xiao, Dingwen and Chen, Lei and Wang, Lin}, title = {E-SAM: Training-Free Segment Every Entity Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24688-24697} }
RobuSTereo: Robust Zero-Shot Stereo Matching under Adverse Weather-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Yuran and Liang, Yingping and Hu, Yutao and Fu, Ying}, title = {RobuSTereo: Robust Zero-Shot Stereo Matching under Adverse Weather}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25134-25144} }
HyPiDecoder: Hybrid Pixel Decoder for Efficient Segmentation and Detection-
[pdf]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Fengzhe and Shi, Humphrey}, title = {HyPiDecoder: Hybrid Pixel Decoder for Efficient Segmentation and Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22100-22109} }
Feature Purification Matters: Suppressing Outlier Propagation for Training-Free Open-Vocabulary Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Jin_2025_ICCV, author = {Jin, Shuo and Yu, Siyue and Zhang, Bingfeng and Sun, Mingjie and Dong, Yi and Xiao, Jimin}, title = {Feature Purification Matters: Suppressing Outlier Propagation for Training-Free Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20291-20300} }
SparseLaneSTP: Leveraging Spatio-Temporal Priors with Sparse Transformers for 3D Lane Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Pittner_2025_ICCV, author = {Pittner, Maximilian and Janai, Joel and Faigle, Mario and Condurache, Alexandru Paul}, title = {SparseLaneSTP: Leveraging Spatio-Temporal Priors with Sparse Transformers for 3D Lane Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29099-29109} }
Semantic Causality-Aware Vision-Based 3D Occupancy Prediction-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Dubing and Zheng, Huan and Zhou, Yucheng and Li, Xianfei and Liao, Wenlong and He, Tao and Peng, Pai and Shen, Jianbing}, title = {Semantic Causality-Aware Vision-Based 3D Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24878-24888} }
OmniSAM: Omnidirectional Segment Anything Model for UDA in Panoramic Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhong_2025_ICCV, author = {Zhong, Ding and Zheng, Xu and Liao, Chenfei and Lyu, Yuanhuiyi and Chen, Jialei and Wu, Shengyang and Zhang, Linfeng and Hu, Xuming}, title = {OmniSAM: Omnidirectional Segment Anything Model for UDA in Panoramic Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23892-23901} }
Probabilistic Inertial Poser (ProbIP): Uncertainty-aware Human Motion Modeling from Sparse Inertial Sensors-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Min and Jeon, Younho and Jo, Sungho}, title = {Probabilistic Inertial Poser (ProbIP): Uncertainty-aware Human Motion Modeling from Sparse Inertial Sensors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25893-25902} }
RIOcc: Efficient Cross-Modal Fusion Transformer with Collaborative Feature Refinement for 3D Semantic Occupancy Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Fan_2025_ICCV, author = {Fan, Baojie and Li, Xiaotian and Zhou, Yuhan and Jiang, Yuyu and Tian, Jiandong and Fan, Huijie}, title = {RIOcc: Efficient Cross-Modal Fusion Transformer with Collaborative Feature Refinement for 3D Semantic Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25851-25861} }
Anomaly Detection of Integrated Circuits Package Substrates Using the Large Vision Model SAIC: Dataset Construction, Methodology, and Application-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2025_ICCV, author = {Yu, Ruiyun and Guo, Bingyang and Li, Haoyuan}, title = {Anomaly Detection of Integrated Circuits Package Substrates Using the Large Vision Model SAIC: Dataset Construction, Methodology, and Application}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22563-22574} }
Refer to Any Segmentation Mask Group With Vision-Language Prompts-
[pdf]
[supp]
[bibtex]@InProceedings{Cao_2025_ICCV, author = {Cao, Shengcao and Wei, Zijun and Kuen, Jason and Liu, Kangning and Zhang, Lingzhi and Gu, Jiuxiang and Jung, HyunJoon and Gui, Liang-Yan and Wang, Yu-Xiong}, title = {Refer to Any Segmentation Mask Group With Vision-Language Prompts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21853-21863} }
Recover Biological Structure from Sparse-View Diffraction Images with Neural Volumetric Prior-
[pdf]
[supp]
[bibtex]@InProceedings{He_2025_ICCV, author = {He, Renzhi and Zhou, Haowen and Chen, Yubei and Xue, Yi}, title = {Recover Biological Structure from Sparse-View Diffraction Images with Neural Volumetric Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27771-27782} }
SpatialSplat: Efficient Semantic 3D from Sparse Unposed Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sheng_2025_ICCV, author = {Sheng, Yu and Deng, Jiajun and Zhang, Xinran and Zhang, Yu and Hua, Bei and Zhang, Yanyong and Ji, Jianmin}, title = {SpatialSplat: Efficient Semantic 3D from Sparse Unposed Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26404-26414} }
ConsistentCity: Semantic Flow-guided Occupancy DiT for Temporally Consistent Driving Scene Synthesis-
[pdf]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Benjin and Wang, Xiaogang and Li, Hongsheng}, title = {ConsistentCity: Semantic Flow-guided Occupancy DiT for Temporally Consistent Driving Scene Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26382-26392} }
G2PDiffusion: Cross-Species Genotype-to-Phenotype Prediction via Evolutionary Diffusion-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Mengdi and Gao, Zhangyang and Chang, Hong and Li, Stan Z. and Shan, Shiguang and Chen, Xilin}, title = {G2PDiffusion: Cross-Species Genotype-to-Phenotype Prediction via Evolutionary Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20705-20714} }
PathFinder: A Multi-Modal Multi-Agent System for Medical Diagnostic Decision-Making Applied to Histopathology-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ghezloo_2025_ICCV, author = {Ghezloo, Fatemeh and Seyfioglu, Mehmet Saygin and Soraki, Rustin and Ikezogwo, Wisdom O. and Li, Beibin and Vivekanandan, Tejoram and Elmore, Joann G. and Krishna, Ranjay and Shapiro, Linda}, title = {PathFinder: A Multi-Modal Multi-Agent System for Medical Diagnostic Decision-Making Applied to Histopathology}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23431-23441} }
MolParser: End-to-end Visual Recognition of Molecule Structures in the Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2025_ICCV, author = {Fang, Xi and Wang, Jiankun and Cai, Xiaochen and Chen, Shangqian and Yang, Shuwen and Tao, Haoyi and Wang, Nan and Yao, Lin and Zhang, Linfeng and Ke, Guolin}, title = {MolParser: End-to-end Visual Recognition of Molecule Structures in the Wild}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24528-24538} }
VideoMiner: Iteratively Grounding Key Frames of Hour-Long Videos via Tree-based Group Relative Policy Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2025_ICCV, author = {Cao, Xinye and Guo, Hongcan and Qian, Jiawen and Nan, Guoshun and Wang, Chao and Pan, Yuqi and Hou, Tianhao and Wang, Xiaojuan and Gao, Yutong}, title = {VideoMiner: Iteratively Grounding Key Frames of Hour-Long Videos via Tree-based Group Relative Policy Optimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23773-23783} }
FROSS: Faster-Than-Real-Time Online 3D Semantic Scene Graph Generation from RGB-D Images-
[pdf]
[supp]
[bibtex]@InProceedings{Hou_2025_ICCV, author = {Hou, Hao-Yu and Lee, Chun-Yi and Sonogashira, Motoharu and Kawanishi, Yasutomo}, title = {FROSS: Faster-Than-Real-Time Online 3D Semantic Scene Graph Generation from RGB-D Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28818-28827} }
HVPUNet: Hybrid-Voxel Point-cloud Upsampling Network-
[pdf]
[bibtex]@InProceedings{Ha_2025_ICCV, author = {Ha, Juhyung and Vats, Vibhas Kumar and Jung, Soon-heung and Reza, Alimoor and Crandall, David J.}, title = {HVPUNet: Hybrid-Voxel Point-cloud Upsampling Network}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29153-29162} }
Growing a Twig to Accelerate Large Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shao_2025_ICCV, author = {Shao, Zhenwei and Wang, Mingyang and Yu, Zhou and Pan, Wenwen and Yang, Yan and Wei, Tao and Zhang, Hongyuan and Mao, Ning and Chen, Wei and Yu, Jun}, title = {Growing a Twig to Accelerate Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20064-20074} }
Controllable Latent Space Augmentation for Digital Pathology-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Boutaj_2025_ICCV, author = {Boutaj, Sofi\`ene and Scalbert, Marin and Marza, Pierre and Couzinie-Devy, Florent and Vakalopoulou, Maria and Christodoulidis, Stergios}, title = {Controllable Latent Space Augmentation for Digital Pathology}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22165-22174} }
WINS: Winograd Structured Pruning for Fast Winograd Convolution-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2025_ICCV, author = {Park, Cheonjun and Oh, Hyun Jae and Park, Mincheol and Moon, Hyunchan and Kim, Minsik and Kim, Suhyun and Yoon, Myung Kuk and Ro, Won Woo}, title = {WINS: Winograd Structured Pruning for Fast Winograd Convolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22477-22487} }
RayGaussX: Accelerating Gaussian-Based Ray Marching for Real-Time and High-Quality Novel View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Blanc_2025_ICCV, author = {Blanc, Hugo and Deschaud, Jean-Emmanuel and Paljic, Alexis}, title = {RayGaussX: Accelerating Gaussian-Based Ray Marching for Real-Time and High-Quality Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27575-27584} }
CoopTrack: Exploring End-to-End Learning for Efficient Cooperative Sequential Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhong_2025_ICCV, author = {Zhong, Jiaru and Wang, Jiahao and Xu, Jiahui and Li, Xiaofan and Nie, Zaiqing and Yu, Haibao}, title = {CoopTrack: Exploring End-to-End Learning for Efficient Cooperative Sequential Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26954-26965} }
ContraGS: Codebook-Condensed and Trainable Gaussian Splatting for Fast, Memory-Efficient Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Durvasula_2025_ICCV, author = {Durvasula, Sankeerth and Muhunthan, Sharanshangar and Moustafa, Zain and Chen, Richard and Liang, Ruofan and Guan, Yushi and Ahuja, Nilesh and Jain, Nilesh and Panneer, Selvakumar and Vijaykumar, Nandita}, title = {ContraGS: Codebook-Condensed and Trainable Gaussian Splatting for Fast, Memory-Efficient Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28935-28945} }
NATRA: Noise-Agnostic Framework for Trajectory Prediction with Noisy Observations-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Rongqing and Li, Changsheng and Lv, Ruilin and Li, Yuhang and Gao, Yang and Zhang, Xiaolu and Zhou, Jun}, title = {NATRA: Noise-Agnostic Framework for Trajectory Prediction with Noisy Observations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27872-27884} }
ModalTune: Fine-Tuning Slide-Level Foundation Models with Multi-Modal Information for Multi-task Learning in Digital Pathology-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ramanathan_2025_ICCV, author = {Ramanathan, Vishwesh and Xu, Tony and Pati, Pushpak and Ahmed, Faruk and Goubran, Maged and Martel, Anne L.}, title = {ModalTune: Fine-Tuning Slide-Level Foundation Models with Multi-Modal Information for Multi-task Learning in Digital Pathology}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23912-23923} }
Harnessing Vision Foundation Models for High-Performance, Training-Free Open Vocabulary Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2025_ICCV, author = {Shi, Yuheng and Dong, Minjing and Xu, Chang}, title = {Harnessing Vision Foundation Models for High-Performance, Training-Free Open Vocabulary Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23487-23497} }
MinCD-PnP: Learning 2D-3D Correspondences with Approximate Blind PnP-
[pdf]
[supp]
[bibtex]@InProceedings{An_2025_ICCV, author = {An, Pei and Yang, Jiaqi and Peng, Muyao and Yang, You and Liu, Qiong and Wu, Xiaolin and Nan, Liangliang}, title = {MinCD-PnP: Learning 2D-3D Correspondences with Approximate Blind PnP}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26519-26528} }
Hallucinatory Image Tokens: A Training-free EAZY Approach to Detecting and Mitigating Object Hallucinations in LVLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Che_2025_ICCV, author = {Che, Liwei and Liu, Tony Qingze and Jia, Jing and Qin, Weiyi and Tang, Ruixiang and Pavlovic, Vladimir}, title = {Hallucinatory Image Tokens: A Training-free EAZY Approach to Detecting and Mitigating Object Hallucinations in LVLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21635-21644} }
MagicDrive-V2: High-Resolution Long Video Generation for Autonomous Driving with Adaptive Control-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2025_ICCV, author = {Gao, Ruiyuan and Chen, Kai and Xiao, Bo and Hong, Lanqing and Li, Zhenguo and Xu, Qiang}, title = {MagicDrive-V2: High-Resolution Long Video Generation for Autonomous Driving with Adaptive Control}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28135-28144} }
Communication-Efficient Multi-Vehicle Collaborative Semantic Segmentation via Sparse 3D Gaussian Sharing-
[pdf]
[supp]
[bibtex]@InProceedings{Hong_2025_ICCV, author = {Hong, Tianyu and Zhou, Xiaobo and Hu, Wenkai and Xie, Qi and Ke, Zhihui and Qiu, Tie}, title = {Communication-Efficient Multi-Vehicle Collaborative Semantic Segmentation via Sparse 3D Gaussian Sharing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28622-28631} }
Sparse-Dense Side-Tuner for efficient Video Temporal Grounding-
[pdf]
[supp]
[bibtex]@InProceedings{Pujol-Perich_2025_ICCV, author = {Pujol-Perich, David and Escalera, Sergio and Clap\'es, Albert}, title = {Sparse-Dense Side-Tuner for efficient Video Temporal Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21515-21524} }
CompCap: Improving Multimodal Large Language Models with Composite Captions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Xiaohui and Shukla, Satya Narayan and Azab, Mahmoud and Singh, Aashu and Wang, Qifan and Yang, David and Peng, ShengYun and Yu, Hanchao and Yan, Shen and Zhang, Xuewen and He, Baosheng}, title = {CompCap: Improving Multimodal Large Language Models with Composite Captions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23582-23592} }
Describe, Adapt and Combine: Empowering CLIP Encoders for Open-set 3D Object Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Zhichuan and Zhou, Yang and Liu, Zhe and Yu, Rui and Bai, Song and Wang, Yulong and He, Xinwei and Bai, Xiang}, title = {Describe, Adapt and Combine: Empowering CLIP Encoders for Open-set 3D Object Retrieval}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21026-21036} }
Embodied Image Captioning: Self-supervised Learning Agents for Spatially Coherent Image Descriptions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Galliena_2025_ICCV, author = {Galliena, Tommaso and Apicella, Tommaso and Rosa, Stefano and Morerio, Pietro and Del Bue, Alessio and Natale, Lorenzo}, title = {Embodied Image Captioning: Self-supervised Learning Agents for Spatially Coherent Image Descriptions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24370-24379} }
Adapt Foundational Segmentation Models with Heterogeneous Searching Space-
[pdf]
[supp]
[bibtex]@InProceedings{Yi_2025_ICCV, author = {Yi, Li and Hu, Jie and Zhang, Songan and Jiang, Guannan}, title = {Adapt Foundational Segmentation Models with Heterogeneous Searching Space}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23364-23373} }
Adversarial Exploitation of Data Diversity Improves Visual Localization-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Sihang and Tan, Siqi and Chang, Bowen and Zhang, Jing and Feng, Chen and Li, Yiming}, title = {Adversarial Exploitation of Data Diversity Improves Visual Localization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26848-26858} }
LawDIS: Language-Window-based Controllable Dichotomous Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2025_ICCV, author = {Yan, Xinyu and Sun, Meijun and Ji, Ge-Peng and Khan, Fahad Shahbaz and Khan, Salman and Fan, Deng-Ping}, title = {LawDIS: Language-Window-based Controllable Dichotomous Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23902-23911} }
HERMES: temporal-coHERent long-forM understanding with Episodes and Semantics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Faure_2025_ICCV, author = {Faure, Gueter Josmy and Yeh, Jia-Fong and Chen, Min-Hung and Su, Hung-Ting and Lai, Shang-Hong and Hsu, Winston H.}, title = {HERMES: temporal-coHERent long-forM understanding with Episodes and Semantics}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22911-22921} }
CoralSRT: Revisiting Coral Reef Semantic Segmentation by Feature Rectification via Self-supervised Guidance-
[pdf]
[supp]
[bibtex]@InProceedings{Zheng_2025_ICCV, author = {Zheng, Ziqiang and Wong, Yuk-Kwan and Hua, Binh-Son and Shi, Jianbo and Yeung, Sai-Kit}, title = {CoralSRT: Revisiting Coral Reef Semantic Segmentation by Feature Rectification via Self-supervised Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19967-19977} }
Omni-scene Perception-oriented Point Cloud Geometry Enhancement for Coordinate Quantization-
[pdf]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Wang and Gao, Wei}, title = {Omni-scene Perception-oriented Point Cloud Geometry Enhancement for Coordinate Quantization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26055-26064} }
B-VLLM: A Vision Large Language Model with Balanced Spatio-Temporal Tokens-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2025_ICCV, author = {Lu, Zhuqiang and Yin, Zhenfei and He, Mengwei and Wang, Zhihui and Liu, Zicheng and Wang, Zhiyong and Hu, Kun}, title = {B-VLLM: A Vision Large Language Model with Balanced Spatio-Temporal Tokens}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24549-24558} }
SL2A-INR: Single-Layer Learnable Activation for Implicit Neural Representation-
[pdf]
[supp]
[bibtex]@InProceedings{Rezaeian_2025_ICCV, author = {Rezaeian, Reza and Heidari, Moein and Azad, Reza and Merhof, Dorit and Soltanian-Zadeh, Hamid and Hacihaliloglu, Ilker}, title = {SL2A-INR: Single-Layer Learnable Activation for Implicit Neural Representation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26065-26074} }
Authentic 4D Driving Simulation with a Video Generation Model-
[pdf]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Lening and Zheng, Wenzhao and Du, Dalong and Zhang, Yunpeng and Ren, Yilong and Jiang, Han and Cui, Zhiyong and Yu, Haiyang and Zhou, Jie and Zhang, Shanghang}, title = {Authentic 4D Driving Simulation with a Video Generation Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28892-28902} }
Curve-Aware Gaussian Splatting for 3D Parametric Curve Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_ICCV, author = {Gao, Zhirui and Yi, Renjiao and Dai, Yaqiao and Zhu, Xuening and Chen, Wei and Zhu, Chenyang and Xu, Kai}, title = {Curve-Aware Gaussian Splatting for 3D Parametric Curve Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27531-27541} }
Sim-DETR: Unlock DETR for Temporal Sentence Grounding-
[pdf]
[bibtex]@InProceedings{Tang_2025_ICCV, author = {Tang, Jiajin and Wei, Zhengxuan and Zhu, Yuchen and Shi, Cheng and Li, Guanbin and Lin, Liang and Yang, Sibei}, title = {Sim-DETR: Unlock DETR for Temporal Sentence Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22760-22771} }
METEOR: Multi-Encoder Collaborative Token Pruning for Efficient Vision Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Yuchen and Wang, Yaoming and Shi, Bowen and Zhang, Xiaopeng and Dai, Wenrui and Li, Chenglin and Xiong, Hongkai and Tian, Qi}, title = {METEOR: Multi-Encoder Collaborative Token Pruning for Efficient Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21492-21504} }
SeqGrowGraph: Learning Lane Topology as a Chain of Graph Expansions-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xie_2025_ICCV, author = {Xie, Mengwei and Zeng, Shuang and Chang, Xinyuan and Liu, Xinran and Pan, Zheng and Xu, Mu and Wei, Xing}, title = {SeqGrowGraph: Learning Lane Topology as a Chain of Graph Expansions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27166-27175} }
Neural Compression for 3D Geometry Sets-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2025_ICCV, author = {Ren, Siyu and Hou, Junhui and Lin, Weiyao and Wang, Wenping}, title = {Neural Compression for 3D Geometry Sets}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25294-25304} }
Lifting the Structural Morphing for Wide-Angle Images Rectification: Unified Content and Boundary Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Luan_2025_ICCV, author = {Luan, Wenting and Lu, Siqi and Zheng, Yongbin and Xu, Wanying and Nie, Lang and Zhou, Zongtan and Liao, Kang}, title = {Lifting the Structural Morphing for Wide-Angle Images Rectification: Unified Content and Boundary Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25529-25538} }
ASGS: Single-Domain Generalizable Open-Set Object Detection via Adaptive Subgraph Searching-
[pdf]
[supp]
[bibtex]@InProceedings{Yuan_2025_ICCV, author = {Yuan, Yuxuan and Tang, Luyao and Chen, Yixin and Chen, Chaoqi and Huang, Yue and Ding, Xinghao}, title = {ASGS: Single-Domain Generalizable Open-Set Object Detection via Adaptive Subgraph Searching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20911-20921} }
UniConvNet: Expanding Effective Receptive Field while Maintaining Asymptotically Gaussian Distribution for ConvNets of Any Scale-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Yuhao and Xi, Wei}, title = {UniConvNet: Expanding Effective Receptive Field while Maintaining Asymptotically Gaussian Distribution for ConvNets of Any Scale}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20922-20933} }
Cross-View Isolated Sign Language Recognition via View Synthesis and Feature Disentanglement-
[pdf]
[supp]
[bibtex]@InProceedings{Shen_2025_ICCV, author = {Shen, Xin and Wang, Xinyu and Shen, Lei and Zhang, Kaihao and Yu, Xin}, title = {Cross-View Isolated Sign Language Recognition via View Synthesis and Feature Disentanglement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20647-20657} }
3DGS-LM: Faster Gaussian-Splatting Optimization with Levenberg-Marquardt-
[pdf]
[supp]
[bibtex]@InProceedings{Hollein_2025_ICCV, author = {H\"ollein, Lukas and Bo\v{z}i\v{c}, Alja\v{z} and Zollh\"ofer, Michael and Nie{\ss}ner, Matthias}, title = {3DGS-LM: Faster Gaussian-Splatting Optimization with Levenberg-Marquardt}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26740-26750} }
GauUpdate: New Object Insertion in 3D Gaussian Fields with Consistent Global Illumination-
[pdf]
[supp]
[bibtex]@InProceedings{Ren_2025_ICCV, author = {Ren, Chengwei and Zhang, Fan and Xu, Liangchao and Pan, Liang and Liu, Ziwei and Wang, Wenping and Zhang, Xiao-Ping and Liu, Yuan}, title = {GauUpdate: New Object Insertion in 3D Gaussian Fields with Consistent Global Illumination}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28653-28663} }
OphCLIP: Hierarchical Retrieval-Augmented Learning for Ophthalmic Surgical Video-Language Pretraining-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2025_ICCV, author = {Hu, Ming and Yuan, Kun and Shen, Yaling and Tang, Feilong and Xu, Xiaohao and Zhou, Lin and Li, Wei and Chen, Ying and Xu, Zhongxing and Peng, Zelin and Yan, Siyuan and Srivastav, Vinkle and Song, Diping and Li, Tianbin and Shi, Danli and Ye, Jin and Padoy, Nicolas and Navab, Nassir and He, Junjun and Ge, Zongyuan}, title = {OphCLIP: Hierarchical Retrieval-Augmented Learning for Ophthalmic Surgical Video-Language Pretraining}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19838-19849} }
IM360: Large-scale Indoor Mapping with 360 Cameras-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jung_2025_ICCV, author = {Jung, Dongki and Choi, Jaehoon and Lee, Yonghan and Manocha, Dinesh}, title = {IM360: Large-scale Indoor Mapping with 360 Cameras}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29040-29050} }
SU-RGS: Relightable 3D Gaussian Splatting from Sparse Views under Unconstrained Illuminations-
[pdf]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Qi and Huang, Chi and Zhang, Qian and Li, Nan and Feng, Wei}, title = {SU-RGS: Relightable 3D Gaussian Splatting from Sparse Views under Unconstrained Illuminations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26859-26868} }
Representation Shift: Unifying Token Compression with FlashAttention-
[pdf]
[arXiv]
[bibtex]@InProceedings{Choi_2025_ICCV, author = {Choi, Joonmyung and Lee, Sanghyeok and Ko, Byungoh and Kim, Eunseo and Kil, Jihyung and Kim, Hyunwoo J.}, title = {Representation Shift: Unifying Token Compression with FlashAttention}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20456-20466} }
GenHancer: Imperfect Generative Models are Secretly Strong Vision-Centric Enhancers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2025_ICCV, author = {Ma, Shijie and Ge, Yuying and Wang, Teng and Guo, Yuxin and Ge, Yixiao and Shan, Ying}, title = {GenHancer: Imperfect Generative Models are Secretly Strong Vision-Centric Enhancers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24402-24412} }
MINERVA: Evaluating Complex Video Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nagrani_2025_ICCV, author = {Nagrani, Arsha and Menon, Sachit and Iscen, Ahmet and Buch, Shyamal and Mehran, Ramin and Jha, Nilpa and Hauth, Anja and Zhu, Yukun and Vondrick, Carl and Sirotenko, Mikhail and Schmid, Cordelia and Weyand, Tobias}, title = {MINERVA: Evaluating Complex Video Reasoning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23968-23978} }
Rectifying Magnitude Neglect in Linear Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2025_ICCV, author = {Fan, Qihang and Huang, Huaibo and Ai, Yuang and He, Ran}, title = {Rectifying Magnitude Neglect in Linear Attention}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21505-21514} }
GEMeX: A Large-Scale, Groundable, and Explainable Medical VQA Benchmark for Chest X-ray Diagnosis-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Bo and Zou, Ke and Zhan, Li-Ming and Lu, Zexin and Dong, Xiaoyu and Chen, Yidi and Xie, Chengqiang and Cao, Jiannong and Wu, Xiao-Ming and Fu, Huazhu}, title = {GEMeX: A Large-Scale, Groundable, and Explainable Medical VQA Benchmark for Chest X-ray Diagnosis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21310-21320} }
M-Net: MRI Brain Tumor Sequential Segmentation Network via Mesh-Cast-
[pdf]
[bibtex]@InProceedings{Lu_2025_ICCV, author = {Lu, Jiacheng and Ding, Hui and Zhang, Shiyu and Huo, Guoping}, title = {M-Net: MRI Brain Tumor Sequential Segmentation Network via Mesh-Cast}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20116-20125} }
Diffusion Image Prior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chihaoui_2025_ICCV, author = {Chihaoui, Hamadi and Favaro, Paolo}, title = {Diffusion Image Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24636-24644} }
Constructing Ophthalmic MLLM for Positioning-diagnosis Collaboration Through Clinical Cognitive Chain Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Xinyao and Song, Diping}, title = {Constructing Ophthalmic MLLM for Positioning-diagnosis Collaboration Through Clinical Cognitive Chain Reasoning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21547-21556} }
Polarimetric Neural Field via Unified Complex-Valued Wave Representation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Chu and Yang, Yixin and Liao, Junda and Guo, Heng and Shi, Boxin and Sato, Imari}, title = {Polarimetric Neural Field via Unified Complex-Valued Wave Representation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25660-25669} }
CLIP-Adapted Region-to-Text Learning for Generative Open-Vocabulary Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Ge_2025_ICCV, author = {Ge, Jiannan and Xie, Lingxi and Xie, Hongtao and Li, Pandeng and Liu, Sun-Ao and Zhang, Xiaopeng and Tian, Qi and Zhang, Yongdong}, title = {CLIP-Adapted Region-to-Text Learning for Generative Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24034-24044} }
TeethGenerator: A two-stage framework for paired pre- and post-orthodontic 3D dental data generation-
[pdf]
[supp]
[bibtex]@InProceedings{Lei_2025_ICCV, author = {Lei, Changsong and Liang, Yaqian and Wang, Shaofeng and Dai, Jiajia and Liu, Yong-Jin}, title = {TeethGenerator: A two-stage framework for paired pre- and post-orthodontic 3D dental data generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25872-25881} }
Enhancing Zero-shot Object Counting via Text-guided Local Ranking and Number-evoked Global Attention-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Shiwei and Zhou, Qi and Ke, Wei}, title = {Enhancing Zero-shot Object Counting via Text-guided Local Ranking and Number-evoked Global Attention}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21097-21106} }
Bidirectional Likelihood Estimation with Multi-Modal Large Language Models for Text-Video Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ko_2025_ICCV, author = {Ko, Dohwan and Lee, Ji Soo and Choi, Minhyuk and Meng, Zihang and Kim, Hyunwoo J.}, title = {Bidirectional Likelihood Estimation with Multi-Modal Large Language Models for Text-Video Retrieval}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22263-22273} }
TinyViM: Frequency Decoupling for Tiny Hybrid Vision Mamba-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2025_ICCV, author = {Ma, Xiaowen and Ni, Zhenliang and Chen, Xinghao}, title = {TinyViM: Frequency Decoupling for Tiny Hybrid Vision Mamba}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23519-23529} }
Bridging Domain Generalization to Multimodal Domain Generalization via Unified Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Hai and Xia, Yan and Zhou, Sashuai and Wang, Hanting and Wang, Shulei and Zhao, Zhou}, title = {Bridging Domain Generalization to Multimodal Domain Generalization via Unified Representations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22488-22498} }
Gaussian-based World Model: Gaussian Priors for Voxel-Based Occupancy Prediction and Future Motion Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Feng_2025_ICCV, author = {Feng, Tuo and Wang, Wenguan and Yang, Yi}, title = {Gaussian-based World Model: Gaussian Priors for Voxel-Based Occupancy Prediction and Future Motion Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25239-25249} }
SketchSplat: 3D Edge Reconstruction via Differentiable Multi-view Sketch Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ying_2025_ICCV, author = {Ying, Haiyang and Zwicker, Matthias}, title = {SketchSplat: 3D Edge Reconstruction via Differentiable Multi-view Sketch Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25649-25659} }
Sat2City: 3D City Generation from A Single Satellite Image with Cascaded Latent Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hua_2025_ICCV, author = {Hua, Tongyan and Jiang, Lutao and Chen, Ying-Cong and Zhao, Wufan}, title = {Sat2City: 3D City Generation from A Single Satellite Image with Cascaded Latent Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27978-27988} }
Hi3DGen: High-fidelity 3D Geometry Generation from Images via Normal Bridging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2025_ICCV, author = {Ye, Chongjie and Wu, Yushuang and Lu, Ziteng and Chang, Jiahao and Guo, Xiaoyang and Zhou, Jiaqing and Zhao, Hao and Han, Xiaoguang}, title = {Hi3DGen: High-fidelity 3D Geometry Generation from Images via Normal Bridging}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25050-25061} }
SC-Captioner: Improving Image Captioning with Self-Correction by Reinforcement Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Lin and Zeng, Xianfang and Li, Kangcong and Yu, Gang and Chen, Tao}, title = {SC-Captioner: Improving Image Captioning with Self-Correction by Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23145-23155} }
Aligning Moments in Time using Video Queries-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kumar_2025_ICCV, author = {Kumar, Yogesh and Agarwal, Uday and Gupta, Manish and Mishra, Anand}, title = {Aligning Moments in Time using Video Queries}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20215-20225} }
StreamGS: Online Generalizable Gaussian Splatting Reconstruction for Unposed Image Streams-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Yang and Wang, Jinglu and Chu, Lei and Li, Xiao and Kao, Shiu-Hong and Chen, Ying-Cong and Lu, Yan}, title = {StreamGS: Online Generalizable Gaussian Splatting Reconstruction for Unposed Image Streams}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25841-25850} }
Ensemble Foreground Management for Unsupervised Object Discovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Ziling and Moemeni, Armaghan and Caleb-Solly, Praminda}, title = {Ensemble Foreground Management for Unsupervised Object Discovery}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20268-20279} }
RESCUE: Crowd Evacuation Simulation via Controlling SDM-United Characters-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Xiaolin and Zhou, Tianyi and Kang, Hongbo and Ma, Jian and Wang, Ziwen and Huang, Jing and Weng, Wenguo and Lai, Yu-Kun and Li, Kun}, title = {RESCUE: Crowd Evacuation Simulation via Controlling SDM-United Characters}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24955-24964} }
Signs as Tokens: A Retrieval-Enhanced Multilingual Sign Language Generator-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zuo_2025_ICCV, author = {Zuo, Ronglai and Potamias, Rolandos Alexandros and Ververas, Evangelos and Deng, Jiankang and Zafeiriou, Stefanos}, title = {Signs as Tokens: A Retrieval-Enhanced Multilingual Sign Language Generator}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23806-23816} }
GUIOdyssey: A Comprehensive Dataset for Cross-App GUI Navigation on Mobile Devices-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_ICCV, author = {Lu, Quanfeng and Shao, Wenqi and Liu, Zitao and Du, Lingxiao and Meng, Fanqing and Li, Boxuan and Chen, Botong and Huang, Siyuan and Zhang, Kaipeng and Luo, Ping}, title = {GUIOdyssey: A Comprehensive Dataset for Cross-App GUI Navigation on Mobile Devices}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22404-22414} }
TOTP: Transferable Online Pedestrian Trajectory Prediction with Temporal-Adaptive Mamba Latent Diffusion-
[pdf]
[bibtex]@InProceedings{Ren_2025_ICCV, author = {Ren, Ziyang and Wei, Ping and Deng, Shangqi and Tang, Haowen and Li, Jiapeng and Li, Huan}, title = {TOTP: Transferable Online Pedestrian Trajectory Prediction with Temporal-Adaptive Mamba Latent Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26263-26272} }
Supercharging Floorplan Localization with Semantic Rays-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Grader_2025_ICCV, author = {Grader, Yuval and Averbuch-Elor, Hadar}, title = {Supercharging Floorplan Localization with Semantic Rays}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27116-27125} }
MDP3: A Training-free Approach for List-wise Frame Selection in Video-LLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2025_ICCV, author = {Sun, Hui and Lu, Shiyin and Wang, Huanyu and Chen, Qing-Guo and Xu, Zhao and Luo, Weihua and Zhang, Kaifu and Li, Ming}, title = {MDP3: A Training-free Approach for List-wise Frame Selection in Video-LLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24090-24101} }
Toward Long-Tailed Online Anomaly Detection through Class-Agnostic Concepts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Chiao-An and Peng, Kuan-Chuan and Yeh, Raymond A.}, title = {Toward Long-Tailed Online Anomaly Detection through Class-Agnostic Concepts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23419-23430} }
DiffTell: A High-Quality Dataset for Describing Image Manipulation Changes-
[pdf]
[supp]
[bibtex]@InProceedings{Di_2025_ICCV, author = {Di, Zonglin and Shi, Jing and Fan, Yifei and Tan, Hao and Black, Alexander and Collomosse, John and Liu, Yang}, title = {DiffTell: A High-Quality Dataset for Describing Image Manipulation Changes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24580-24590} }
TrafficLoc: Localizing Traffic Surveillance Cameras in 3D Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2025_ICCV, author = {Xia, Yan and Lu, Yunxiang and Song, Rui and Dhaouadi, Oussema and Henriques, Jo\~ao F. and Cremers, Daniel}, title = {TrafficLoc: Localizing Traffic Surveillance Cameras in 3D Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28685-28695} }
CA-I2P: Channel-Adaptive Registration Network with Global Optimal Selection-
[pdf]
[bibtex]@InProceedings{Cheng_2025_ICCV, author = {Cheng, Zhixin and Deng, Jiacheng and Li, Xinjun and Yin, Xiaotian and Liao, Bohao and Yin, Baoqun and Yang, Wenfei and Zhang, Tianzhu}, title = {CA-I2P: Channel-Adaptive Registration Network with Global Optimal Selection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27739-27749} }
Exploiting Vision Language Model for Training-Free 3D Point Cloud OOD Detection via Graph Score Propagation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Tiankai and Li, Yushu and Goodge, Adam and Teng, Fei and Yang, Xulei and Li, Tianrui and Xu, Xun}, title = {Exploiting Vision Language Model for Training-Free 3D Point Cloud OOD Detection via Graph Score Propagation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28797-28807} }
AHCPTQ: Accurate and Hardware-Compatible Post-Training Quantization for Segment Anything Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Wenlun and Zhong, Yunshan and Ando, Shimpei and Yoshioka, Kentaro}, title = {AHCPTQ: Accurate and Hardware-Compatible Post-Training Quantization for Segment Anything Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22383-22392} }
WonderTurbo: Generating Interactive 3D World in 0.72 Seconds-
[pdf]
[supp]
[bibtex]@InProceedings{Ni_2025_ICCV, author = {Ni, Chaojun and Wang, Xiaofeng and Zhu, Zheng and Wang, Weijie and Li, Haoyun and Zhao, Guosheng and Li, Jie and Qin, Wenkang and Huang, Guan and Mei, Wenjun}, title = {WonderTurbo: Generating Interactive 3D World in 0.72 Seconds}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27423-27434} }
S3E: Self-Supervised State Estimation for Radar-Inertial System-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Shengpeng and Xie, Yulong and Liao, Qing and Wang, Wei}, title = {S3E: Self-Supervised State Estimation for Radar-Inertial System}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26686-26695} }
GenFlow3D: Generative Scene Flow Estimation and Prediction on Point Cloud Sequences-
[pdf]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Hanlin and Weng, Wenming and Zhang, Yueyi and Xiong, Zhiwei}, title = {GenFlow3D: Generative Scene Flow Estimation and Prediction on Point Cloud Sequences}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27488-27497} }
PBCAT: Patch-Based Composite Adversarial Training against Physically Realizable Attacks on Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Xiao and Zhu, Yiming and Huang, Yifan and Zhang, Wei and He, Yingzhe and Shi, Jie and Hu, Xiaolin}, title = {PBCAT: Patch-Based Composite Adversarial Training against Physically Realizable Attacks on Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24456-24466} }
PossLoss: A Reliable and Sensitive Facial Landmark Detection Loss Function-
[pdf]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Qikui}, title = {PossLoss: A Reliable and Sensitive Facial Landmark Detection Loss Function}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24858-24867} }
StealthAttack: Robust 3D Gaussian Splatting Poisoning via Density-Guided Illusions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ke_2025_ICCV, author = {Ke, Bo-Hsu and Xie, You-Zhe and Liu, Yu-Lun and Chiu, Wei-Chen}, title = {StealthAttack: Robust 3D Gaussian Splatting Poisoning via Density-Guided Illusions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27400-27411} }
LightCity: An Urban Dataset for Outdoor Inverse Rendering and Reconstruction under Multi-illumination Conditions-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Jingjing and Hu, Qirui and Bao, Chong and Zhu, Yuke and Bao, Hujun and Cui, Zhaopeng and Zhang, Guofeng}, title = {LightCity: An Urban Dataset for Outdoor Inverse Rendering and Reconstruction under Multi-illumination Conditions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26477-26487} }
Region-aware Anchoring Mechanism for Efficient Referring Visual Grounding-
[pdf]
[supp]
[bibtex]@InProceedings{Ouyang_2025_ICCV, author = {Ouyang, Shuyi and Niu, Ziwei and Wang, Hongyi and Chen, Yen-Wei and Lin, Lanfen}, title = {Region-aware Anchoring Mechanism for Efficient Referring Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24192-24202} }
Hybrid-Tower: Fine-grained Pseudo-query Interaction and Generation for Text-to-Video Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Lan_2025_ICCV, author = {Lan, Bangxiang and Xie, Ruobing and Zhao, Ruixiang and Sun, Xingwu and Kang, Zhanhui and Yang, Gang and Li, Xirong}, title = {Hybrid-Tower: Fine-grained Pseudo-query Interaction and Generation for Text-to-Video Retrieval}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24497-24506} }
Monocular Semantic Scene Completion via Masked Recurrent Networks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Xuzhi and Wu, Xinran and Wang, Song and Kong, Lingdong and Zhao, Ziping}, title = {Monocular Semantic Scene Completion via Masked Recurrent Networks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24811-24822} }
TurboReg: TurboClique for Robust and Efficient Point Cloud Registration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2025_ICCV, author = {Yan, Shaocheng and Shi, Pengcheng and Zhao, Zhenjun and Wang, Kaixin and Cao, Kuang and Wu, Ji and Li, Jiayuan}, title = {TurboReg: TurboClique for Robust and Efficient Point Cloud Registration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26371-26381} }
Neural Inverse Rendering for High-Accuracy 3D Measurement of Moving Objects with Fewer Phase-Shifting Patterns-
[pdf]
[supp]
[bibtex]@InProceedings{Urakawa_2025_ICCV, author = {Urakawa, Yuki and Watanabe, Yoshihiro}, title = {Neural Inverse Rendering for High-Accuracy 3D Measurement of Moving Objects with Fewer Phase-Shifting Patterns}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27692-27701} }
Dynamic-VLM: Simple Dynamic Visual Token Compression for VideoLLM-
[pdf]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Han and Nie, Yuxiang and Ye, Yongjie and Wang, Yanjie and Li, Shuai and Yu, Haiyang and Lu, Jinghui and Huang, Can}, title = {Dynamic-VLM: Simple Dynamic Visual Token Compression for VideoLLM}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20812-20823} }
Two Losses, One Goal: Balancing Conflict Gradients for Semi-supervised Semantic Segmentation-
[pdf]
[bibtex]@InProceedings{Sun_2025_ICCV, author = {Sun, Rui and Mai, Huayu and Li, Wangkai and Chen, Yujia and Wang, Yuan}, title = {Two Losses, One Goal: Balancing Conflict Gradients for Semi-supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20357-20367} }
SAM4D: Segment Anything in Camera and LiDAR Streams-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Jianyun and Wang, Song and Ni, Ziqian and Hu, Chunyong and Yang, Sheng and Zhu, Jianke and Li, Qiang}, title = {SAM4D: Segment Anything in Camera and LiDAR Streams}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28535-28545} }
TAB: Transformer Attention Bottlenecks enable User Intervention and Debugging in Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rahmanzadehgervi_2025_ICCV, author = {Rahmanzadehgervi, Pooyan and Nguyen, Hung Huy and Liu, Rosanne and Mai, Long and Nguyen, Anh Totti}, title = {TAB: Transformer Attention Bottlenecks enable User Intervention and Debugging in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22551-22562} }
Feature Extraction and Representation of Pre-training Point Cloud Based on Diffusion Models-
[pdf]
[bibtex]@InProceedings{Qiu_2025_ICCV, author = {Qiu, Chang and Da, Feipeng and Zhang, Zilei}, title = {Feature Extraction and Representation of Pre-training Point Cloud Based on Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26559-26568} }
EmbodiedSplat: Personalized Real-to-Sim-to-Real Navigation with Gaussian Splats from a Mobile Device-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chhablani_2025_ICCV, author = {Chhablani, Gunjan and Ye, Xiaomeng and Irshad, Muhammad Zubair and Kira, Zsolt}, title = {EmbodiedSplat: Personalized Real-to-Sim-to-Real Navigation with Gaussian Splats from a Mobile Device}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25431-25441} }
SynCity: Training-Free Generation of 3D Worlds-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Engstler_2025_ICCV, author = {Engstler, Paul and Shtedritski, Aleksandar and Laina, Iro and Rupprecht, Christian and Vedaldi, Andrea}, title = {SynCity: Training-Free Generation of 3D Worlds}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27585-27595} }
Resonance: Learning to Predict Social-Aware Pedestrian Trajectories as Co-Vibrations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wong_2025_ICCV, author = {Wong, Conghao and Zou, Ziqian and Xia, Beihao}, title = {Resonance: Learning to Predict Social-Aware Pedestrian Trajectories as Co-Vibrations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25788-25799} }
Leaps and Bounds: An Improved Point Cloud Winding Number Formulation for Fast Normal Estimation and Surface Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Koneputugodage_2025_ICCV, author = {Koneputugodage, Chamin Hewa and Campbell, Dylan and Gould, Stephen}, title = {Leaps and Bounds: An Improved Point Cloud Winding Number Formulation for Fast Normal Estimation and Surface Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26116-26125} }
Cross-Category Subjectivity Generalization for Style-Adaptive Sketch Re-ID-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2025_ICCV, author = {Hu, Zechao and Yang, Zhengwei and Li, Hao and Wang, Zheng and Zou, Yixiong}, title = {Cross-Category Subjectivity Generalization for Style-Adaptive Sketch Re-ID}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22644-22653} }
CARL: Causality-guided Architecture Representation Learning for an Interpretable Performance Predictor-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ji_2025_ICCV, author = {Ji, Han and Feng, Yuqi and Fan, Jiahao and Sun, Yanan}, title = {CARL: Causality-guided Architecture Representation Learning for an Interpretable Performance Predictor}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23019-23029} }
An OpenMind for 3D Medical Vision Self-supervised Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wald_2025_ICCV, author = {Wald, Tassilo and Ulrich, Constantin and Suprijadi, Jonathan and Ziegler, Sebastian and Nohel, Michal and Peretzke, Robin and Kohler, Gregor and Maier-Hein, Klaus}, title = {An OpenMind for 3D Medical Vision Self-supervised Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23839-23879} }
Dynamic Dictionary Learning for Remote Sensing Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zou_2025_ICCV, author = {Zou, Xuechao and Li, Yue and Zhang, Shun and Li, Kai and Wang, Shiying and Tao, Pin and Xing, Junliang and Lang, Congyan}, title = {Dynamic Dictionary Learning for Remote Sensing Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22457-22466} }
MaterialMVP: Illumination-Invariant Material Generation via Multi-view PBR Diffusion-
[pdf]
[arXiv]
[bibtex]@InProceedings{He_2025_ICCV, author = {He, Zebin and Yang, Mingxin and Yang, Shuhui and Tang, Yixuan and Wang, Tao and Zhang, Kaihao and Chen, Guanying and Liu, Yuhong and Jiang, Jie and Guo, Chunchao and Luo, Wenhan}, title = {MaterialMVP: Illumination-Invariant Material Generation via Multi-view PBR Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26294-26305} }
LookOut: Real-World Humanoid Egocentric Navigation-
[pdf]
[bibtex]@InProceedings{Pan_2025_ICCV, author = {Pan, Boxiao and Harley, Adam W. and Engelmann, Francis and Liu, C. Karen and Guibas, Leonidas J.}, title = {LookOut: Real-World Humanoid Egocentric Navigation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24977-24988} }
Lightweight Gradient-Aware Upscaling of 3D Gaussian Splatting Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Niedermayr_2025_ICCV, author = {Niedermayr, Simon and Neuhauser, Christoph and Westermann, R\"udiger}, title = {Lightweight Gradient-Aware Upscaling of 3D Gaussian Splatting Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25862-25871} }
Balancing Conservatism and Aggressiveness: Prototype-Affinity Hybrid Network for Few-Shot Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zou_2025_ICCV, author = {Zou, Tianyu and Xiong, Shengwu and Yao, Ruilin and Rong, Yi}, title = {Balancing Conservatism and Aggressiveness: Prototype-Affinity Hybrid Network for Few-Shot Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20561-20571} }
Large Scene Generation with Cube-Absorb Discrete Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2025_ICCV, author = {Hu, Qianjiang and Hu, Wei}, title = {Large Scene Generation with Cube-Absorb Discrete Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25186-25196} }
MS3D: High-Quality 3D Generation via Multi-Scale Representation Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Luo_2025_ICCV, author = {Luo, Guan and Zhang, Jianfeng}, title = {MS3D: High-Quality 3D Generation via Multi-Scale Representation Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26336-26348} }
Memory-Efficient 4-bit Preconditioned Stochastic Optimization-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Jingyang and Ding, Kuangyu and Toh, Kim-Chuan and Zhou, Pan}, title = {Memory-Efficient 4-bit Preconditioned Stochastic Optimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22633-22643} }
On the Recovery of Cameras from Fundamental Matrices-
[pdf]
[supp]
[bibtex]@InProceedings{Madhavan_2025_ICCV, author = {Madhavan, Rakshith and Arrigoni, Federica}, title = {On the Recovery of Cameras from Fundamental Matrices}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20934-20943} }
Feather the Throttle: Revisiting Visual Token Pruning for Vision-Language Model Acceleration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Endo_2025_ICCV, author = {Endo, Mark and Wang, Xiaohan and Yeung-Levy, Serena}, title = {Feather the Throttle: Revisiting Visual Token Pruning for Vision-Language Model Acceleration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22826-22835} }
MEGA: Memory-Efficient 4D Gaussian Splatting for Dynamic Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xinjie and Liu, Zhening and Zhang, Yifan and Ge, Xingtong and He, Dailan and Xu, Tongda and Wang, Yan and Lin, Zehong and Yan, Shuicheng and Zhang, Jun}, title = {MEGA: Memory-Efficient 4D Gaussian Splatting for Dynamic Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27828-27838} }
Bridging 3D Anomaly Localization and Repair via High-Quality Continuous Geometric Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_ICCV, author = {Zheng, Bozhong and Gan, Jinye and Xu, Xiaohao and Chen, Xintao and Li, Wenqiao and Huang, Xiaonan and Ni, Na and Wu, Yingna}, title = {Bridging 3D Anomaly Localization and Repair via High-Quality Continuous Geometric Representation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27063-27072} }
Foresight in Motion: Reinforcing Trajectory Prediction with Reward Heuristics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pei_2025_ICCV, author = {Pei, Muleilan and Shi, Shaoshuai and Chen, Xuesong and Liu, Xu and Shen, Shaojie}, title = {Foresight in Motion: Reinforcing Trajectory Prediction with Reward Heuristics}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28303-28312} }
Spectral Sensitivity Estimation with an Uncalibrated Diffraction Grating-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Makabe_2025_ICCV, author = {Makabe, Lilika and Santo, Hiroaki and Okura, Fumio and Brown, Michael S. and Matsushita, Yasuyuki}, title = {Spectral Sensitivity Estimation with an Uncalibrated Diffraction Grating}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27252-27261} }
Moment Quantization for Video Temporal Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2025_ICCV, author = {Sun, Xiaolong and Wang, Le and Zhou, Sanping and Shi, Liushuai and Xia, Kun and Liu, Mengnan and Wang, Yabing and Hua, Gang}, title = {Moment Quantization for Video Temporal Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20137-20146} }
MSQ: Memory-Efficient Bit Sparsification Quantization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2025_ICCV, author = {Han, Seokho and Yoon, Seoyeon and Kim, Jinhee and Wang, Dongwei and Jeon, Kang Eun and Yang, Huanrui and Ko, Jong Hwan}, title = {MSQ: Memory-Efficient Bit Sparsification Quantization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21885-21894} }
Cycle Consistency as Reward: Learning Image-Text Alignment without Human Preferences-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bahng_2025_ICCV, author = {Bahng, Hyojin and Chan, Caroline and Durand, Fredo and Isola, Phillip}, title = {Cycle Consistency as Reward: Learning Image-Text Alignment without Human Preferences}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22934-22946} }
Vid-Group: Temporal Video Grounding Pretraining from Unlabeled Videos in the Wild-
[pdf]
[supp]
[bibtex]@InProceedings{Bao_2025_ICCV, author = {Bao, Peijun and Kong, Chenqi and Yang, Siyuan and Shao, Zihao and Jiang, Xinghao and Ng, Boon Poh and Er, Meng Hwa and Kot, Alex}, title = {Vid-Group: Temporal Video Grounding Pretraining from Unlabeled Videos in the Wild}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20541-20550} }
BANet: Bilateral Aggregation Network for Mobile Stereo Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Gangwei and Liu, Jiaxin and Wang, Xianqi and Cheng, Junda and Deng, Yong and Zang, Jinliang and Chen, Yurui and Yang, Xin}, title = {BANet: Bilateral Aggregation Network for Mobile Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28870-28880} }
AIM: Adaptive Inference of Multi-Modal LLMs via Token Merging and Pruning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhong_2025_ICCV, author = {Zhong, Yiwu and Liu, Zhuoming and Li, Yin and Wang, Liwei}, title = {AIM: Adaptive Inference of Multi-Modal LLMs via Token Merging and Pruning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20180-20192} }
G2SF: Geometry-Guided Score Fusion for Multimodal Industrial Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Tao_2025_ICCV, author = {Tao, Chengyu and Cao, Xuanming and Du, Juan}, title = {G2SF: Geometry-Guided Score Fusion for Multimodal Industrial Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20551-20560} }
SA-Occ: Satellite-Assisted 3D Occupancy Prediction in Real World-
[pdf]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Chen and Wang, Zhirui and Sheng, Taowei and Jiang, Yi and Li, Yundu and Cheng, Peirui and Zhang, Luning and Chen, Kaiqiang and Hu, Yanfeng and Yang, Xue and Sun, Xian}, title = {SA-Occ: Satellite-Assisted 3D Occupancy Prediction in Real World}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27021-27030} }
Implicit Counterfactual Learning for Audio-Visual Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zha_2025_ICCV, author = {Zha, Mingfeng and Li, Tianyu and Wang, Guoqing and Wang, Peng and Wu, Yangyang and Yang, Yang and Shen, Heng Tao}, title = {Implicit Counterfactual Learning for Audio-Visual Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22349-22360} }
FlowEdit: Inversion-Free Text-Based Editing Using Pre-Trained Flow Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kulikov_2025_ICCV, author = {Kulikov, Vladimir and Kleiner, Matan and Huberman-Spiegelglas, Inbar and Michaeli, Tomer}, title = {FlowEdit: Inversion-Free Text-Based Editing Using Pre-Trained Flow Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19721-19730} }
CLIPSym: Delving into Symmetry Detection with CLIP-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Tinghan and Rahman, Md Ashiqur and Yeh, Raymond A.}, title = {CLIPSym: Delving into Symmetry Detection with CLIP}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21003-21013} }
MRGen: Segmentation Data Engine For Underrepresented MRI Modalities-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Haoning and Zhao, Ziheng and Zhang, Ya and Wang, Yanfeng and Xie, Weidi}, title = {MRGen: Segmentation Data Engine For Underrepresented MRI Modalities}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19903-19913} }
Accelerate 3D Object Detection Models via Zero-Shot Attention Key Pruning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Lizhen and Bai, Xiuxiu and Jia, Xiaojun and Fang, Jianwu and Pang, Shanmin}, title = {Accelerate 3D Object Detection Models via Zero-Shot Attention Key Pruning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23085-23094} }
Adaptive Learning of High-Value Regions for Semi-Supervised Medical Image Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Lei_2025_ICCV, author = {Lei, Tao and Yang, Ziyao and Wang, Xingwu and Wang, Yi and Wang, Xuan and Sun, Feiman and Nandi, Asoke K.}, title = {Adaptive Learning of High-Value Regions for Semi-Supervised Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21450-21459} }
Learning Neural Scene Representation from iToF Imaging-
[pdf]
[supp]
[bibtex]@InProceedings{Chang_2025_ICCV, author = {Chang, Wenjie and Chang, Hanzhi and Zhang, Yueyi and Yang, Wenfei and Zhang, Tianzhu}, title = {Learning Neural Scene Representation from iToF Imaging}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27937-27946} }
Robust Machine Unlearning for Quantized Neural Networks via Adaptive Gradient Reweighting with Similar Labels-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tong_2025_ICCV, author = {Tong, Yujia and Wang, Yuze and Yuan, Jingling and Hu, Chuang}, title = {Robust Machine Unlearning for Quantized Neural Networks via Adaptive Gradient Reweighting with Similar Labels}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20603-20612} }
SegAnyPET: Universal Promptable Segmentation from Positron Emission Tomography Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yichi and Xue, Le and Zhang, Wenbo and Li, Lanlan and Liu, Yuchen and Jiang, Chen and Cheng, Yuan and Qi, Yuan}, title = {SegAnyPET: Universal Promptable Segmentation from Positron Emission Tomography Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21107-21116} }
ILLUME: Illuminating Your LLMs to See, Draw, and Self-Enhance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Chunwei and Lu, Guansong and Yang, Junwei and Huang, Runhui and Han, Jianhua and Hou, Lu and Zhang, Wei and Xu, Hang}, title = {ILLUME: Illuminating Your LLMs to See, Draw, and Self-Enhance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21612-21622} }
No Pose at All: Self-Supervised Pose-Free 3D Gaussian Splatting from Sparse Views-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Ranran and Mikolajczyk, Krystian}, title = {No Pose at All: Self-Supervised Pose-Free 3D Gaussian Splatting from Sparse Views}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27947-27957} }
EmbodiedOcc: Embodied 3D Occupancy Prediction for Vision-based Online Scene Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Yuqi and Zheng, Wenzhao and Zuo, Sicheng and Huang, Yuanhui and Zhou, Jie and Lu, Jiwen}, title = {EmbodiedOcc: Embodied 3D Occupancy Prediction for Vision-based Online Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26360-26370} }
DiST-4D: Disentangled Spatiotemporal Diffusion with Metric Depth for 4D Driving Scene Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2025_ICCV, author = {Guo, Jiazhe and Ding, Yikang and Chen, Xiwu and Chen, Shuo and Li, Bohan and Zou, Yingshuang and Lyu, Xiaoyang and Tan, Feiyang and Qi, Xiaojuan and Li, Zhiheng and Zhao, Hao}, title = {DiST-4D: Disentangled Spatiotemporal Diffusion with Metric Depth for 4D Driving Scene Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27231-27241} }
Enhancing Prompt Generation with Adaptive Refinement for Camouflaged Object Detection-
[pdf]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Xuehan and Ren, Guangyu and Dai, Tianhong and Stathaki, Tania and Liu, Hengyan}, title = {Enhancing Prompt Generation with Adaptive Refinement for Camouflaged Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20672-20682} }
Generalizable Non-Line-of-Sight Imaging with Learnable Physical Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2025_ICCV, author = {Sun, Shida and Li, Yue and Zhang, Yueyi and Xiong, Zhiwei}, title = {Generalizable Non-Line-of-Sight Imaging with Learnable Physical Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25040-25049} }
ReMP-AD: Retrieval-enhanced Multi-modal Prompt Fusion for Few-Shot Industrial Visual Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2025_ICCV, author = {Ma, Hongchi and Yang, Guanglei and Zhao, Debin and Ji, Yanli and Zuo, Wangmeng}, title = {ReMP-AD: Retrieval-enhanced Multi-modal Prompt Fusion for Few-Shot Industrial Visual Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20425-20434} }
TOGA: Temporally Grounded Open-Ended Video QA with Weak Supervision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gupta_2025_ICCV, author = {Gupta, Ayush and Roy, Anirban and Chellappa, Rama and Bastian, Nathaniel D. and Velasquez, Alvaro and Jha, Susmit}, title = {TOGA: Temporally Grounded Open-Ended Video QA with Weak Supervision}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23593-23603} }
MUSE-VL: Modeling Unified VLM through Semantic Discrete Encoding-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2025_ICCV, author = {Xie, Rongchang and Du, Chen and Song, Ping and Liu, Chang}, title = {MUSE-VL: Modeling Unified VLM through Semantic Discrete Encoding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24135-24146} }
Task Vector Quantization for Memory-Efficient Model Merging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Youngeun and Lee, Seunghwan and Jung, Aecheon and Ryu, Bogon and Hong, Sungeun}, title = {Task Vector Quantization for Memory-Efficient Model Merging}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20105-20115} }
GeoFormer: Geometry Point Encoder for 3D Object Detection with Graph-based Transformer-
[pdf]
[bibtex]@InProceedings{Jin_2025_ICCV, author = {Jin, Xin and Su, Haisheng and Ma, Cong and Liu, Kai and Wu, Wei and Hui, Fei and Yan, Junchi}, title = {GeoFormer: Geometry Point Encoder for 3D Object Detection with Graph-based Transformer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26879-26889} }
Unraveling the Effects of Synthetic Data on End-to-End Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ge_2025_ICCV, author = {Ge, Junhao and Liu, Zuhong and Fan, Longteng and Jiang, Yifan and Su, Jiaqi and Li, Yiming and Zhang, Zhejun and Chen, Siheng}, title = {Unraveling the Effects of Synthetic Data on End-to-End Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28859-28869} }
NeuFrameQ: Neural Frame Fields for Scalable and Generalizable Anisotropic Quadrangulation-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Ying-Tian and Li, Jiajun and Liu, Yu-Tao and Yu, Xin and Guo, Yuan-Chen and Cao, Yan-Pei and Liang, Ding and Shamir, Ariel and Zhang, Song-Hai}, title = {NeuFrameQ: Neural Frame Fields for Scalable and Generalizable Anisotropic Quadrangulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28000-28009} }
Keyframe-oriented Vision Token Pruning: Enhancing Efficiency of Large Vision Language Models on Long-Form Video Processing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Yudong and Sun, Jingwei and Lin, Yueqian and Zhang, Jianyi and Zhang, Jingyang and Yin, Ming and Wang, Qinsi and Li, Hai and Chen, Yiran}, title = {Keyframe-oriented Vision Token Pruning: Enhancing Efficiency of Large Vision Language Models on Long-Form Video Processing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20802-20811} }
LongSplat: Robust Unposed 3D Gaussian Splatting for Casual Long Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_ICCV, author = {Lin, Chin-Yang and Sun, Cheng and Yang, Fu-En and Chen, Min-Hung and Lin, Yen-Yu and Liu, Yu-Lun}, title = {LongSplat: Robust Unposed 3D Gaussian Splatting for Casual Long Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27412-27422} }
S3R-GS: Streamlining the Pipeline for Large-Scale Street Scene Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Zheng_2025_ICCV, author = {Zheng, Guangting and Deng, Jiajun and Chu, Xiaomeng and Yuan, Yu and Li, Houqiang and Zhang, Yanyong}, title = {S3R-GS: Streamlining the Pipeline for Large-Scale Street Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25594-25604} }
Beyond One Shot, Beyond One Perspective: Cross-View and Long-Horizon Distillation for Better LiDAR Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Xiang and Kong, Lingdong and Wang, Song and Zhou, Chuanwei and Liu, Qingshan}, title = {Beyond One Shot, Beyond One Perspective: Cross-View and Long-Horizon Distillation for Better LiDAR Representations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25506-25518} }
GaussianUpdate: Continual 3D Gaussian Splatting Update for Changing Environments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2025_ICCV, author = {Zeng, Lin and Zhao, Boming and Hu, Jiarui and Shen, Xujie and Dang, Ziqiang and Bao, Hujun and Cui, Zhaopeng}, title = {GaussianUpdate: Continual 3D Gaussian Splatting Update for Changing Environments}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25800-25809} }
SiM3D: Single-instance Multiview Multimodal and Multisetup 3D Anomaly Detection Benchmark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Costanzino_2025_ICCV, author = {Costanzino, Alex and Ramirez, Pierluigi Zama and Lella, Luigi and Ragaglia, Matteo and Oliva, Alessandro and Lisanti, Giuseppe and Di Stefano, Luigi}, title = {SiM3D: Single-instance Multiview Multimodal and Multisetup 3D Anomaly Detection Benchmark}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20944-20953} }
Teaching AI the Anatomy Behind the Scan: Addressing Anatomical Flaws in Medical Image Segmentation with Learnable Prior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeon_2025_ICCV, author = {Jeon, Young Seok and Yang, Hongfei and Fu, Huazhu and Feng, Mengling}, title = {Teaching AI the Anatomy Behind the Scan: Addressing Anatomical Flaws in Medical Image Segmentation with Learnable Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24024-24033} }
WorldScore: A Unified Evaluation Benchmark for World Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Duan_2025_ICCV, author = {Duan, Haoyi and Yu, Hong-Xing and Chen, Sirui and Fei-Fei, Li and Wu, Jiajun}, title = {WorldScore: A Unified Evaluation Benchmark for World Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27713-27724} }
EvaGaussians: Event Stream Assisted Gaussian Splatting from Blurry Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_ICCV, author = {Yu, Wangbo and Feng, Chaoran and Li, Jianing and Tang, Jiye and Yang, Jiashu and Tang, Zhenyu and Cao, Meng and Jia, Xu and Yang, Yuchao and Yuan, Li and Tian, Yonghong}, title = {EvaGaussians: Event Stream Assisted Gaussian Splatting from Blurry Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24780-24790} }
CuMPerLay: Learning Cubical Multiparameter Persistence Vectorizations-
[pdf]
[supp]
[bibtex]@InProceedings{Korkmaz_2025_ICCV, author = {Korkmaz, Caner and Nuwagira, Brighton and Coskunuzer, Baris and Birdal, Tolga}, title = {CuMPerLay: Learning Cubical Multiparameter Persistence Vectorizations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27084-27094} }
Recovering Parametric Scenes from Very Few Time-of-Flight Pixels-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sifferman_2025_ICCV, author = {Sifferman, Carter and Li, Yiquan and Li, Yiming and Mu, Fangzhou and Gleicher, Michael and Gupta, Mohit and Li, Yin}, title = {Recovering Parametric Scenes from Very Few Time-of-Flight Pixels}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27989-27999} }
Always Skip Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ji_2025_ICCV, author = {Ji, Yiping and Saratchandran, Hemanth and Moghadam, Peyman and Lucey, Simon}, title = {Always Skip Attention}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23115-23123} }
ExCap3D: Expressive 3D Scene Understanding via Object Captioning with Varying Detail-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yeshwanth_2025_ICCV, author = {Yeshwanth, Chandan and Rozenberszki, D\'avid and Dai, Angela}, title = {ExCap3D: Expressive 3D Scene Understanding via Object Captioning with Varying Detail}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21699-21709} }
LeGrad: An Explainability Method for Vision Transformers via Feature Formation Sensitivity-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bousselham_2025_ICCV, author = {Bousselham, Walid and Boggust, Angie and Chaybouti, Sofian and Strobelt, Hendrik and Kuehne, Hilde}, title = {LeGrad: An Explainability Method for Vision Transformers via Feature Formation Sensitivity}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20336-20345} }
GaussianFlowOcc: Sparse and Weakly Supervised Occupancy Estimation using Gaussian Splatting and Temporal Flow-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Boeder_2025_ICCV, author = {Boeder, Simon and Gigengack, Fabian and Risse, Benjamin}, title = {GaussianFlowOcc: Sparse and Weakly Supervised Occupancy Estimation using Gaussian Splatting and Temporal Flow}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24943-24954} }
OD-RASE: Ontology-Driven Risk Assessment and Safety Enhancement for Autonomous Driving-
[pdf]
[supp]
[bibtex]@InProceedings{Shimomura_2025_ICCV, author = {Shimomura, Kota and Nambata, Masaki and Ishikawa, Atsuya and Mimura, Ryota and Inoue, Koki and Yamashita, Takayoshi and Kawabuchi, Takayuki}, title = {OD-RASE: Ontology-Driven Risk Assessment and Safety Enhancement for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26167-26177} }
Serialization based Point Cloud Oversegmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2025_ICCV, author = {Lu, Chenghui and Kwan, Jianlong and Li, Dilong and Chen, Ziyi and Guan, Haiyan}, title = {Serialization based Point Cloud Oversegmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25831-25840} }
Latent Expression Generation for Referring Image Segmentation and Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_ICCV, author = {Yu, Seonghoon and Hong, Joonbeom and Lee, Joonseok and Son, Jeany}, title = {Latent Expression Generation for Referring Image Segmentation and Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21374-21383} }
Beyond Single Images: Retrieval Self-Augmented Unsupervised Camouflaged Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Du_2025_ICCV, author = {Du, Ji and Wang, Xin and Hao, Fangwei and Yu, Mingyang and Chen, Chunyuan and Wu, Jiesheng and Wang, Bin and Xu, Jing and Li, Ping}, title = {Beyond Single Images: Retrieval Self-Augmented Unsupervised Camouflaged Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22131-22142} }
GeoSplatting: Towards Geometry Guided Gaussian Splatting for Physically-based Inverse Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2025_ICCV, author = {Ye, Kai and Gao, Chong and Li, Guanbin and Chen, Wenzheng and Chen, Baoquan}, title = {GeoSplatting: Towards Geometry Guided Gaussian Splatting for Physically-based Inverse Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28991-29000} }
Temporal-aware Query Routing for Real-time Video Instance Segmentation-
[pdf]
[bibtex]@InProceedings{Cheng_2025_ICCV, author = {Cheng, Zesen and Li, Kehan and Zhao, Yian and Zhang, Hang and Liu, Chang and Chen, Jie}, title = {Temporal-aware Query Routing for Real-time Video Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22467-22476} }
CapeLLM: Support-Free Category-Agnostic Pose Estimation with Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Junho and Chung, Hyungjin and Kim, Byung-Hoon}, title = {CapeLLM: Support-Free Category-Agnostic Pose Estimation with Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22889-22898} }
VisHall3D: Monocular Semantic Scene Completion from Reconstructing the Visible Regions to Hallucinating the Invisible Regions-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lu_2025_ICCV, author = {Lu, Haoang and Su, Yuanqi and Zhang, Xiaoning and Gao, Longjun and Xue, Yu and Wang, Le}, title = {VisHall3D: Monocular Semantic Scene Completion from Reconstructing the Visible Regions to Hallucinating the Invisible Regions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28674-28684} }
Controllable 3D Outdoor Scene Generation via Scene Graphs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Yuheng and Li, Xinke and Zhang, Yuning and Qi, Lu and Li, Xin and Wang, Wenping and Li, Chongshou and Li, Xueting and Yang, Ming-Hsuan}, title = {Controllable 3D Outdoor Scene Generation via Scene Graphs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28052-28062} }
UniOcc: A Unified Benchmark for Occupancy Forecasting and Prediction in Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Yuping and Huang, Xiangyu and Sun, Xiaokang and Yan, Mingxuan and Xing, Shuo and Tu, Zhengzhong and Li, Jiachen}, title = {UniOcc: A Unified Benchmark for Occupancy Forecasting and Prediction in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25560-25570} }
NGD: Neural Gradient Based Deformation for Monocular Garment Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dasgupta_2025_ICCV, author = {Dasgupta, Soham and Naik, Shanthika and Savalia, Preet and Ingle, Sujay Kumar and Sharma, Avinash}, title = {NGD: Neural Gradient Based Deformation for Monocular Garment Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25485-25495} }
U-ViLAR: Uncertainty-Aware Visual Localization for Autonomous Driving via Differentiable Association and Registration-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Xiaofan and Xu, Zhihao and Wu, Chenming and Yang, Zhao and Zhang, Yumeng and Liu, Jiang-Jiang and Yu, Haibao and Ye, Xiaoqing and Wang, Yuan and Li, Shirui and Sun, Xun and Wan, Ji and Wang, Jun}, title = {U-ViLAR: Uncertainty-Aware Visual Localization for Autonomous Driving via Differentiable Association and Registration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24889-24898} }
CF3: Compact and Fast 3D Feature Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Hyunjoon and Min, Joonkyu and Park, Jaesik}, title = {CF3: Compact and Fast 3D Feature Fields}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27906-27916} }
Few-Shot Pattern Detection via Template Matching and Regression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jo_2025_ICCV, author = {Jo, Eunchan and Kang, Dahyun and Kim, Sanghyun and Choi, Yunseon and Cho, Minsu}, title = {Few-Shot Pattern Detection via Template Matching and Regression}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21578-21588} }
Leveraging BEV Paradigm for Ground-to-Aerial Image Synthesis-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ye_2025_ICCV, author = {Ye, Junyan and He, Jun and Li, Weijia and Lv, Zhutao and Lin, Yi and Yu, Jinhua and Yang, Haote and He, Conghui}, title = {Leveraging BEV Paradigm for Ground-to-Aerial Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28451-28461} }
MultiverSeg: Scalable Interactive Segmentation of Biomedical Imaging Datasets with In-Context Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wong_2025_ICCV, author = {Wong, Hallee E. and Ortiz, Jose Javier Gonzalez and Guttag, John and Dalca, Adrian V.}, title = {MultiverSeg: Scalable Interactive Segmentation of Biomedical Imaging Datasets with In-Context Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20966-20980} }
MuGS: Multi-Baseline Generalizable Gaussian Splatting Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lou_2025_ICCV, author = {Lou, Yaopeng and Shen, Liao and Liu, Tianqi and Li, Jiaqi and Huang, Zihao and Sun, Huiqiang and Cao, Zhiguo}, title = {MuGS: Multi-Baseline Generalizable Gaussian Splatting Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25583-25593} }
Aligning Effective Tokens with Video Anomaly in Large Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Yingxian and Liu, Jiahui and Fan, Ruidi and Li, Yanwei and Chang, Chirui and Zhao, Shizhen and Fok, Wilton W. T. and Qi, Xiaojuan and Wu, Yik-Chung}, title = {Aligning Effective Tokens with Video Anomaly in Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22695-22706} }
Benchmarking Burst Super-Resolution for Polarization Images: Noise Dataset and Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hwang_2025_ICCV, author = {Hwang, Inseung and Choi, Kiseok and Ha, Hyunho and Kim, Min H.}, title = {Benchmarking Burst Super-Resolution for Polarization Images: Noise Dataset and Analysis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24899-24909} }
JointDiT: Enhancing RGB-Depth Joint Modeling with Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Byung-Ki_2025_ICCV, author = {Byung-Ki, Kwon and Dai, Qi and Hyoseok, Lee and Luo, Chong and Oh, Tae-Hyun}, title = {JointDiT: Enhancing RGB-Depth Joint Modeling with Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25261-25271} }
Discretized Gaussian Representation for Tomographic Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Shaokai and Lu, Yuxiang and Guo, Yapan and Ji, Wei and Huang, Suizhi and Yang, Fengyu and Sirejiding, Shalayiding and He, Qichen and Tong, Jing and Ji, Yanbiao and Ding, Yue and Lu, Hongtao}, title = {Discretized Gaussian Representation for Tomographic Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25073-25082} }
Language Decoupling with Fine-grained Knowledge Guidance for Referring Multi-object Tracking-
[pdf]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Guangyao and Zhuang, Siping and Jian, Yajun and Yan, Yan and Wang, Hanzi}, title = {Language Decoupling with Fine-grained Knowledge Guidance for Referring Multi-object Tracking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23626-23635} }
Referring Expression Comprehension for Small Objects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Goto_2025_ICCV, author = {Goto, Kanoko and Hirose, Takumi and Ukai, Mahiro and Kurita, Shuhei and Inoue, Nakamasa}, title = {Referring Expression Comprehension for Small Objects}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21231-21242} }
InstaDrive: Instance-Aware Driving World Models for Realistic and Consistent Video Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Zhuoran and Guo, Xi and Ding, Chenjing and Wang, Chiyu and Wu, Wei and Zhang, Yanyong}, title = {InstaDrive: Instance-Aware Driving World Models for Realistic and Consistent Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25410-25420} }
Global-Aware Monocular Semantic Scene Completion with State Space Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Shijie and Cheng, Zhongyao and Li, Rong and Li, Shuai and Gall, Juergen and Xu, Xun and Yang, Xulei}, title = {Global-Aware Monocular Semantic Scene Completion with State Space Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25550-25559} }
3D Gaussian Splatting Driven Multi-View Robust Physical Adversarial Camouflage Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lou_2025_ICCV, author = {Lou, Tianrui and Jia, Xiaojun and Liang, Siyuan and Liang, Jiawei and Zhang, Ming and Xiao, Yanjun and Cao, Xiaochun}, title = {3D Gaussian Splatting Driven Multi-View Robust Physical Adversarial Camouflage Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28752-28762} }
SuperMat: Physically Consistent PBR Material Estimation at Interactive Rates-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hong_2025_ICCV, author = {Hong, Yijia and Guo, Yuan-Chen and Yi, Ran and Chen, Yulong and Cao, Yan-Pei and Ma, Lizhuang}, title = {SuperMat: Physically Consistent PBR Material Estimation at Interactive Rates}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25083-25093} }
HiMTok: Learning Hierarchical Mask Tokens for Image Segmentation with Large Multimodal Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Tao and Cheng, Changxu and Wang, Lingfeng and Chen, Senda and Zhao, Wuyue}, title = {HiMTok: Learning Hierarchical Mask Tokens for Image Segmentation with Large Multimodal Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23267-23278} }
MixA-Q: Revisiting Activation Sparsity for Vision Transformers from a Mixed-Precision Quantization Perspective-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Weitian and Shubham, Rai and De La Parra, Cecilia and Kumar, Akash}, title = {MixA-Q: Revisiting Activation Sparsity for Vision Transformers from a Mixed-Precision Quantization Perspective}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22143-22152} }
EMD: Explicit Motion Modeling for High-Quality Street Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2025_ICCV, author = {Wei, Xiaobao and Wuwu, Qingpo and Zhao, Zhongyu and Wu, Zhuangzhe and Huang, Nan and Lu, Ming and Ma, Ningning and Zhang, Shanghang}, title = {EMD: Explicit Motion Modeling for High-Quality Street Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28462-28472} }
Neural Multi-View Self-Calibrated Photometric Stereo without Photometric Stereo Cues-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2025_ICCV, author = {Cao, Xu and Taketomi, Takafumi}, title = {Neural Multi-View Self-Calibrated Photometric Stereo without Photometric Stereo Cues}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27552-27562} }
Multi-Modal Multi-Task Unified Embedding Model (M3T-UEM): A Task-Adaptive Representation Learning Framework-
[pdf]
[supp]
[bibtex]@InProceedings{Sharma_2025_ICCV, author = {Sharma, Rohan and Chen, Changyou and Chang, Feng-Ju and Yun, Seongjun and Xie, Xiaohu and Meng, Rui and Xu, Dehong and Mottini, Alejandro and Cui, Qingjun}, title = {Multi-Modal Multi-Task Unified Embedding Model (M3T-UEM): A Task-Adaptive Representation Learning Framework}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22783-22793} }
Towards More Diverse and Challenging Pre-training for Point Cloud Learning: Self-Supervised Cross Reconstruction with Decoupled Views-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xiangdong and Zhang, Shaofeng and Yan, Junchi}, title = {Towards More Diverse and Challenging Pre-training for Point Cloud Learning: Self-Supervised Cross Reconstruction with Decoupled Views}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28696-28706} }
Baking Gaussian Splatting into Diffusion Denoiser for Fast and Scalable Single-stage Image-to-3D Generation and Reconstruction-
[pdf]
[arXiv]
[bibtex]@InProceedings{Cai_2025_ICCV, author = {Cai, Yuanhao and Zhang, He and Zhang, Kai and Liang, Yixun and Ren, Mengwei and Luan, Fujun and Liu, Qing and Kim, Soo Ye and Zhang, Jianming and Zhang, Zhifei and Zhou, Yuqian and Zhang, Yulun and Yang, Xiaokang and Lin, Zhe and Yuille, Alan}, title = {Baking Gaussian Splatting into Diffusion Denoiser for Fast and Scalable Single-stage Image-to-3D Generation and Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25062-25072} }
Unsupervised Imaging Inverse Problems with Diffusion Distribution Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Meanti_2025_ICCV, author = {Meanti, Giacomo and Ryckeboer, Thomas and Arbel, Michael and Mairal, Julien}, title = {Unsupervised Imaging Inverse Problems with Diffusion Distribution Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28364-28374} }
Integrating Biological Knowledge for Robust Microscopy Image Profiling on De Novo Cell Lines-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Jiayuan and Pham, Thai-Hoang and Wang, Yuanlong and Zhang, Ping}, title = {Integrating Biological Knowledge for Robust Microscopy Image Profiling on De Novo Cell Lines}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22846-22856} }
OuroMamba: A Data-Free Quantization Framework for Vision Mamba-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ramachandran_2025_ICCV, author = {Ramachandran, Akshat and Lee, Mingyu and Xu, Huan and Kundu, Souvik and Krishna, Tushar}, title = {OuroMamba: A Data-Free Quantization Framework for Vision Mamba}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21177-21186} }
MA-CIR: A Multimodal Arithmetic Benchmark for Composed Image Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Byun_2025_ICCV, author = {Byun, Jaeseok and Jang, Young Kyun and Jeong, Seokhyeon and Kim, Donghyun and Moon, Taesup}, title = {MA-CIR: A Multimodal Arithmetic Benchmark for Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21342-21352} }
Large-scale Pre-training for Grounded Video Caption Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kazakos_2025_ICCV, author = {Kazakos, Evangelos and Schmid, Cordelia and Sivic, Josef}, title = {Large-scale Pre-training for Grounded Video Caption Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24434-24444} }
SimMLM: A Simple Framework for Multi-modal Learning with Missing Modality-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Sijie and Chen, Chen and Han, Jungong}, title = {SimMLM: A Simple Framework for Multi-modal Learning with Missing Modality}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24068-24077} }
LINR-PCGC: Lossless Implicit Neural Representations for Point Cloud Geometry Compression-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Wenjie and Yang, Qi and Xia, Shuting and Huang, He and Xu, Yiling and Li, Zhu}, title = {LINR-PCGC: Lossless Implicit Neural Representations for Point Cloud Geometry Compression}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28577-28586} }
RoCo-Sim: Enhancing Roadside Collaborative Perception through Foreground Simulation-
[pdf]
[bibtex]@InProceedings{Du_2025_ICCV, author = {Du, Yuwen and Hu, Anning and Chao, Zichen and Lu, Yifan and Ge, Junhao and Liu, Genjia and Wu, Weitao and Wang, Lanjun and Chen, Siheng}, title = {RoCo-Sim: Enhancing Roadside Collaborative Perception through Foreground Simulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26977-26986} }
UKBOB: One Billion MRI Labeled Masks for Generalizable 3D Medical Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bourigault_2025_ICCV, author = {Bourigault, Emmanuelle and Jamaludin, Amir and Hamdi, Abdullah}, title = {UKBOB: One Billion MRI Labeled Masks for Generalizable 3D Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21600-21611} }
GaussianOcc: Fully Self-supervised and Efficient 3D Occupancy Estimation with Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gan_2025_ICCV, author = {Gan, Wanshui and Liu, Fang and Xu, Hongbin and Mo, Ningkai and Yokoya, Naoto}, title = {GaussianOcc: Fully Self-supervised and Efficient 3D Occupancy Estimation with Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28980-28990} }
Free-MoRef: Instantly Multiplexing Context Perception Capabilities of Video-MLLMs within Single Inference-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Kuo and Zheng, Quanlong and Xie, Junlin and Zhang, Yanhao and Luo, Jinguo and Lu, Haonan and Lin, Liang and Zhou, Fan and Li, Guanbin}, title = {Free-MoRef: Instantly Multiplexing Context Perception Capabilities of Video-MLLMs within Single Inference}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22499-22508} }
Liberated-GS: 3D Gaussian Splatting Independent from SfM Point Clouds-
[pdf]
[supp]
[bibtex]@InProceedings{Pan_2025_ICCV, author = {Pan, Weihong and Zhang, Xiaoyu and Zhai, Hongjia and Xiang, Xiaojun and Jiang, Hanqing and Zhang, Guofeng}, title = {Liberated-GS: 3D Gaussian Splatting Independent from SfM Point Clouds}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26675-26685} }
DecAD: Decoupling Anomalies in Latent Space for Multi-Class Unsupervised Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Xiaolei and Wang, Xiaoyang and Bai, Huihui and Lim, Eng Gee and Xiao, Jimin}, title = {DecAD: Decoupling Anomalies in Latent Space for Multi-Class Unsupervised Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21568-21577} }
Kaputt: A Large-Scale Dataset for Visual Defect Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Hofer_2025_ICCV, author = {H\"ofer, Sebastian and Henning, Dorian F. and Amiranashvili, Artemij and Morrison, Douglas and Tzes, Mariliza and Posner, Ingmar and Matvienko, Marc and Rennola, Alessandro and Milan, Anton}, title = {Kaputt: A Large-Scale Dataset for Visual Defect Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24224-24233} }
Occupancy Learning with Spatiotemporal Memory-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Leng_2025_ICCV, author = {Leng, Ziyang and Yang, Jiawei and Yi, Wenlong and Zhou, Bolei}, title = {Occupancy Learning with Spatiotemporal Memory}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26569-26578} }
ReferEverything: Towards Segmenting Everything We Can Speak of in Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bagchi_2025_ICCV, author = {Bagchi, Anurag and Bao, Zhipeng and Wang, Yu-Xiong and Tokmakov, Pavel and Hebert, Martial}, title = {ReferEverything: Towards Segmenting Everything We Can Speak of in Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23221-23231} }
Make Your Training Flexible: Towards Deployment-Efficient Video Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Chenting and Li, Kunchang and Jiang, Tianxiang and Zeng, Xiangyu and Wang, Yi and Wang, Limin}, title = {Make Your Training Flexible: Towards Deployment-Efficient Video Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23880-23891} }
RadarSplat: Radar Gaussian Splatting for High-Fidelity Data Synthesis and 3D Reconstruction of Autonomous Driving Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kung_2025_ICCV, author = {Kung, Pou-Chun and Harisha, Skanda and Vasudevan, Ram and Eid, Aline and Skinner, Katherine A.}, title = {RadarSplat: Radar Gaussian Splatting for High-Fidelity Data Synthesis and 3D Reconstruction of Autonomous Driving Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27596-27606} }
Hierarchical 3D Scene Graphs Construction Outdoors-
[pdf]
[supp]
[bibtex]@InProceedings{Nyffeler_2025_ICCV, author = {Nyffeler, Jon and Tombari, Federico and Barath, Daniel}, title = {Hierarchical 3D Scene Graphs Construction Outdoors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26817-26826} }
SIC: Similarity-Based Interpretable Image Classification with Neural Networks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wolf_2025_ICCV, author = {Wolf, Tom Nuno and Kavak, Emre and Bongratz, Fabian and Wachinger, Christian}, title = {SIC: Similarity-Based Interpretable Image Classification with Neural Networks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24276-24285} }
CoLMDriver: LLM-based Negotiation Benefits Cooperative Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Changxing and Liu, Genjia and Wang, Zijun and Yang, Jinchang and Chen, Siheng}, title = {CoLMDriver: LLM-based Negotiation Benefits Cooperative Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25951-25960} }
InsideOut: Integrated RGB-Radiative Gaussian Splatting for Comprehensive 3D Object Representation-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Jungmin and Hong, Seonghyuk and Lee, Juyong and Lee, Jaeyoon and Choi, Jongwon}, title = {InsideOut: Integrated RGB-Radiative Gaussian Splatting for Comprehensive 3D Object Representation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25820-25830} }
Splat-LOAM: Gaussian Splatting LiDAR Odometry and Mapping-
[pdf]
[supp]
[bibtex]@InProceedings{Giacomini_2025_ICCV, author = {Giacomini, Emanuele and Di Giammarino, Luca and De Rebotti, Lorenzo and Grisetti, Giorgio and Oswald, Martin R.}, title = {Splat-LOAM: Gaussian Splatting LiDAR Odometry and Mapping}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27630-27639} }
RTMap: Real-Time Recursive Mapping with Change Detection and Localization-
[pdf]
[arXiv]
[bibtex]@InProceedings{Du_2025_ICCV, author = {Du, Yuheng and Yang, Sheng and Wang, Lingxuan and Hou, Zhenghua and Cai, Chengying and Tan, Zhitao and Chen, Mingxia and Huang, Shi-Sheng and Li, Qiang}, title = {RTMap: Real-Time Recursive Mapping with Change Detection and Localization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28021-28030} }
Exploiting Domain Properties in Language-Driven Domain Generalization for Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Jeon_2025_ICCV, author = {Jeon, Seogkyu and Hong, Kibeom and Byun, Hyeran}, title = {Exploiting Domain Properties in Language-Driven Domain Generalization for Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20791-20801} }
OVG-HQ: Online Video Grounding with Hybrid-modal Queries-
[pdf]
[supp]
[bibtex]@InProceedings{Zeng_2025_ICCV, author = {Zeng, Runhao and Mao, Jiaqi and Lai, Minghao and Phan, Minh Hieu and Dong, Yanjie and Wang, Wei and Chen, Qi and Hu, Xiping}, title = {OVG-HQ: Online Video Grounding with Hybrid-modal Queries}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21085-21096} }
ConformalSAM: Unlocking the Potential of Foundational Segmentation Models in Semi-Supervised Semantic Segmentation with Conformal Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Danhui and Liu, Ziquan and Yang, Chuxi and Wang, Dan and Yan, Yan and Xu, Yi and Ji, Xiangyang}, title = {ConformalSAM: Unlocking the Potential of Foundational Segmentation Models in Semi-Supervised Semantic Segmentation with Conformal Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24045-24055} }
V2PE: Improving Multimodal Long-Context Capability of Vision-Language Models with Variable Visual Position Encoding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ge_2025_ICCV, author = {Ge, Junqi and Chen, Ziyi and Lin, Jintao and Zhu, Jinguo and Liu, Xihui and Dai, Jifeng and Zhu, Xizhou}, title = {V2PE: Improving Multimodal Long-Context Capability of Vision-Language Models with Variable Visual Position Encoding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21070-21084} }
Multi-modal Segment Anything Model for Camouflaged Scene Segmentation-
[pdf]
[bibtex]@InProceedings{Ren_2025_ICCV, author = {Ren, Guangyu and Liu, Hengyan and Lazarou, Michalis and Stathaki, Tania}, title = {Multi-modal Segment Anything Model for Camouflaged Scene Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19882-19892} }
LLaVA-PruMerge: Adaptive Token Reduction for Efficient Large Multimodal Models-
[pdf]
[supp]
[bibtex]@InProceedings{Shang_2025_ICCV, author = {Shang, Yuzhang and Cai, Mu and Xu, Bingxin and Lee, Yong Jae and Yan, Yan}, title = {LLaVA-PruMerge: Adaptive Token Reduction for Efficient Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22857-22867} }
Event-boosted Deformable 3D Gaussians for Dynamic Scene Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Wenhao and Weng, Wenming and Zhang, Yueyi and Xu, Ruikang and Xiong, Zhiwei}, title = {Event-boosted Deformable 3D Gaussians for Dynamic Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28334-28343} }
Stereo Any Video: Temporally Consistent Stereo Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jing_2025_ICCV, author = {Jing, Junpeng and Luo, Weixun and Mao, Ye and Mikolajczyk, Krystian}, title = {Stereo Any Video: Temporally Consistent Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20836-20846} }
Constraint-Aware Feature Learning for Parametric Point Cloud-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2025_ICCV, author = {Cheng, Xi and Lei, Ruiqi and Huang, Di and Liao, Zhichao and Piao, Fengyuan and Chen, Yan and Feng, Pingfa and Zeng, Long}, title = {Constraint-Aware Feature Learning for Parametric Point Cloud}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28114-28124} }
Revisiting Point Cloud Completion: Are We Ready For The Real-World?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pathak_2025_ICCV, author = {Pathak, Stuti and Kumar, Prashant and Baiju, Dheeraj and Mboga, Nicholus and Steenackers, Gunther and Penne, Rudi}, title = {Revisiting Point Cloud Completion: Are We Ready For The Real-World?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25388-25398} }
Structure-aware Semantic Discrepancy and Consistency for 3D Medical Image Self-supervised Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2025_ICCV, author = {Pan, Tan and Tan, Zhaorui and Guo, Kaiyu and Xu, Dongli and Xu, Weidi and Jiang, Chen and Guo, Xin and Qi, Yuan and Cheng, Yuan}, title = {Structure-aware Semantic Discrepancy and Consistency for 3D Medical Image Self-supervised Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20257-20267} }
MagShield: Towards Better Robustness in Sparse Inertial Motion Capture Under Magnetic Disturbances-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shao_2025_ICCV, author = {Shao, Yunzhe and Yi, Xinyu and Yin, Lu and Guo, Shihui and Yong, Junhai and Xu, Feng}, title = {MagShield: Towards Better Robustness in Sparse Inertial Motion Capture Under Magnetic Disturbances}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29021-29030} }
Bias-Resilient Weakly Supervised Semantic Segmentation Using Normalizing Flows-
[pdf]
[supp]
[bibtex]@InProceedings{Qiu_2025_ICCV, author = {Qiu, Xianglin and Wang, Xiaoyang and Zhang, Zhen and Xiao, Jimin}, title = {Bias-Resilient Weakly Supervised Semantic Segmentation Using Normalizing Flows}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21321-21330} }
UAVScenes: A Multi-Modal Dataset for UAVs-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Sijie and Li, Siqi and Zhang, Yawei and Yu, Shangshu and Yuan, Shenghai and She, Rui and Guo, Quanjiang and Zheng, JinXuan and Howe, Ong Kang and Chandra, Leonrich and Srijeyan, Shrivarshann and Sivadas, Aditya and Aggarwal, Toshan and Liu, Heyuan and Zhang, Hongming and Chen, Chujie and Jiang, Junyu and Xie, Lihua and Tay, Wee Peng}, title = {UAVScenes: A Multi-Modal Dataset for UAVs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28946-28958} }
TPG-INR: Target Prior-Guided Implicit 3D CT Reconstruction for Enhanced Sparse-view Imaging-
[pdf]
[bibtex]@InProceedings{Cao_2025_ICCV, author = {Cao, Qinglei and Tang, Ziyao and Tang, Xiaoqin}, title = {TPG-INR: Target Prior-Guided Implicit 3D CT Reconstruction for Enhanced Sparse-view Imaging}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28239-28248} }
S4M: Boosting Semi-Supervised Instance Segmentation with SAM-
[pdf]
[supp]
[bibtex]@InProceedings{Yoon_2025_ICCV, author = {Yoon, Heeji and Shin, Heeseong and Hong, Eunbeen and Choi, Hyunwook and Cho, Hansang and Jeong, Daun and Kim, Seungryong}, title = {S4M: Boosting Semi-Supervised Instance Segmentation with SAM}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20226-20236} }
Debiased Curriculum Adaptation for Safe Transfer Learning in Chest X-ray Classification-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Mingyang and Chen, Xinyang and Shu, Yang and Li, Xiucheng and Guan, Weili and Nie, Liqiang}, title = {Debiased Curriculum Adaptation for Safe Transfer Learning in Chest X-ray Classification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22610-22619} }
Quadratic Gaussian Splatting: High Quality Surface Reconstruction with Second-order Geometric Primitives-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Ziyu and Huang, Binbin and Jiang, Hanqing and Zhou, Liyang and Xiang, Xiaojun and Shen, Shuhan}, title = {Quadratic Gaussian Splatting: High Quality Surface Reconstruction with Second-order Geometric Primitives}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28260-28270} }
Prompt Guidance and Human Proximal Perception for HOT Prediction with Regional Joint Loss-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Yuxiao and Lei, Yu and Wei, Zhenao and Xue, Weiying and Jiang, Xinyu and Zhuang, Nan and Liu, Qi}, title = {Prompt Guidance and Human Proximal Perception for HOT Prediction with Regional Joint Loss}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23636-23645} }
Towards Omnimodal Expressions and Reasoning in Referring Audio-Visual Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ying_2025_ICCV, author = {Ying, Kaining and Ding, Henghui and Jie, Guangquan and Jiang, Yu-Gang}, title = {Towards Omnimodal Expressions and Reasoning in Referring Audio-Visual Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22575-22585} }
Towards a Universal 3D Medical Multi-modality Generalization via Learning Personalized Invariant Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tan_2025_ICCV, author = {Tan, Zhaorui and Yang, Xi and Pan, Tan and Liu, Tianyi and Jiang, Chen and Guo, Xin and Wang, Qiufeng and Nguyen, Anh and Qi, Yuan and Huang, Kaizhu and Cheng, Yuan}, title = {Towards a Universal 3D Medical Multi-modality Generalization via Learning Personalized Invariant Representation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21895-21905} }
SurfaceSplat: Connecting Surface Reconstruction and Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_ICCV, author = {Gao, Zihui and Bian, Jia-Wang and Lin, Guosheng and Chen, Hao and Shen, Chunhua}, title = {SurfaceSplat: Connecting Surface Reconstruction and Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28525-28534} }
TransiT: Transient Transformer for Non-line-of-sight Videography-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Ruiqian and Shen, Siyuan and Xia, Suan and Wang, Ziheng and Peng, Xingyue and Song, Chengxuan and Zhu, Yingsheng and Wu, Tao and Li, Shiying and Yu, Jingyi}, title = {TransiT: Transient Transformer for Non-line-of-sight Videography}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27542-27551} }
SAFT: Shape and Appearance of Fabrics from Template via Differentiable Physical Simulations from Monocular Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Stotko_2025_ICCV, author = {Stotko, David and Klein, Reinhard}, title = {SAFT: Shape and Appearance of Fabrics from Template via Differentiable Physical Simulations from Monocular Video}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27660-27670} }
AD-GS: Object-Aware B-Spline Gaussian Splatting for Self-Supervised Autonomous Driving-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Jiawei and Deng, Kai and Fan, Zexin and Wang, Shenlong and Xie, Jin and Yang, Jian}, title = {AD-GS: Object-Aware B-Spline Gaussian Splatting for Self-Supervised Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24770-24779} }
HRScene: How Far Are VLMs from Effective High-Resolution Image Understanding?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yusen and Zheng, Wenliang and Madasu, Aashrith and Shi, Peng and Kamoi, Ryo and Zhou, Hao and Zou, Zhuoyang and Zhao, Shu and Das, Sarkar Snigdha Sarathi and Gupta, Vipul and Lu, Xiaoxin and Zhang, Nan and Zhang, Ranran Haoran and Iyer, Avitej and Lou, Renze and Yin, Wenpeng and Zhang, Rui}, title = {HRScene: How Far Are VLMs from Effective High-Resolution Image Understanding?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22922-22933} }
Graph Domain Adaptation with Dual-branch Encoder and Two-level Alignment for Whole Slide Image-based Survival Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shou_2025_ICCV, author = {Shou, Yuntao and Cao, Xiangyong and Yan, Peiqiang and Hui, Qiao and Zhao, Qian and Meng, Deyu}, title = {Graph Domain Adaptation with Dual-branch Encoder and Two-level Alignment for Whole Slide Image-based Survival Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19925-19935} }
Free-running vs Synchronous: Single-Photon Lidar for High-flux 3D Imaging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kitichotkul_2025_ICCV, author = {Kitichotkul, Ruangrawee and Bharadwaj, Shashwath and Rapp, Joshua and Ma, Yanting and Mehta, Alexander and Goyal, Vivek K}, title = {Free-running vs Synchronous: Single-Photon Lidar for High-flux 3D Imaging}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25972-25982} }
MobileIE: An Extremely Lightweight and Effective ConvNet for Real-Time Image Enhancement on Mobile Devices-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2025_ICCV, author = {Yan, Hailong and Li, Ao and Zhang, Xiangtao and Liu, Zhe and Shi, Zenglin and Zhu, Ce and Zhang, Le}, title = {MobileIE: An Extremely Lightweight and Effective ConvNet for Real-Time Image Enhancement on Mobile Devices}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21949-21960} }
ACE-G: Improving Generalization of Scene Coordinate Regression Through Query Pre-Training-
[pdf]
[supp]
[bibtex]@InProceedings{Bruns_2025_ICCV, author = {Bruns, Leonard and Barroso-Laguna, Axel and Cavallari, Tommaso and Monszpart, Aron and Munukutla, Sowmya and Prisacariu, Victor Adrian and Brachmann, Eric}, title = {ACE-G: Improving Generalization of Scene Coordinate Regression Through Query Pre-Training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26751-26761} }
Everything is a Video: Unifying Modalities through Next-Frame Prediction-
[pdf]
[arXiv]
[bibtex]@InProceedings{Hudson_2025_ICCV, author = {Hudson, G. Thomas and Slack, Dean and Winterbottom, Thomas and Sterling, Jamie and Xiao, Chenghao and Shentu, Junjie and Al Moubayed, Noura}, title = {Everything is a Video: Unifying Modalities through Next-Frame Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22004-22013} }
LayerTracer: Cognitive-Aligned Layered SVG Synthesis via Diffusion Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2025_ICCV, author = {Song, Yiren and Chen, Danze and Shou, Mike Zheng}, title = {LayerTracer: Cognitive-Aligned Layered SVG Synthesis via Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19731-19741} }
TopoTTA: Topology-Enhanced Test-Time Adaptation for Tubular Structure Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Jiale and Wang, Wenhan and Li, Shikun and Qu, Xiaolei and Guo, Xin and Liu, Yizhong and Tang, Wenzhong and Lin, Xun and Zheng, Yefeng}, title = {TopoTTA: Topology-Enhanced Test-Time Adaptation for Tubular Structure Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24123-24134} }
Counting Stacked Objects-
[pdf]
[supp]
[bibtex]@InProceedings{Dumery_2025_ICCV, author = {Dumery, Corentin and Ett\'e, Noa and Fan, Aoxiang and Li, Ren and Xu, Jingyi and Le, Hieu and Fua, Pascal}, title = {Counting Stacked Objects}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19774-19783} }
Sparfels: Fast Reconstruction from Sparse Unposed Imagery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jena_2025_ICCV, author = {Jena, Shubhendu and Ouasfi, Amine and Younes, Mae and Boukhayma, Adnane}, title = {Sparfels: Fast Reconstruction from Sparse Unposed Imagery}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27476-27487} }
MikuDance: Animating Character Art with Mixed Motion Dynamics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Jiaxu and Zeng, Xianfang and Chen, Xin and Zuo, Wei and Yu, Gang and Tu, Zhigang}, title = {MikuDance: Animating Character Art with Mixed Motion Dynamics}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19689-19699} }
GaussRender: Learning 3D Occupancy with Gaussian Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chambon_2025_ICCV, author = {Chambon, Loick and Zablocki, Eloi and Boulch, Alexandre and Chen, Mickael and Cord, Matthieu}, title = {GaussRender: Learning 3D Occupancy with Gaussian Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27010-27020} }
UniDxMD: Towards Unified Representation for Cross-Modal Unsupervised Domain Adaptation in 3D Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Liang_2025_ICCV, author = {Liang, Zhengyin and Yin, Hui and Liang, Min and Du, Qianqian and Yang, Ying and Huang, Hua}, title = {UniDxMD: Towards Unified Representation for Cross-Modal Unsupervised Domain Adaptation in 3D Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20346-20356} }
HumanOLAT: A Large-Scale Dataset for Full-Body Human Relighting and Novel-View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Teufel_2025_ICCV, author = {Teufel, Timo and Gera, Pulkit and Zhou, Xilong and Iqbal, Umar and Rao, Pramod and Kautz, Jan and Golyanik, Vladislav and Theobalt, Christian}, title = {HumanOLAT: A Large-Scale Dataset for Full-Body Human Relighting and Novel-View Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29131-29141} }
Momentum-GS: Momentum Gaussian Self-Distillation for High-Quality Large Scene Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Fan_2025_ICCV, author = {Fan, Jixuan and Li, Wanhua and Han, Yifei and Dai, Tianru and Tang, Yansong}, title = {Momentum-GS: Momentum Gaussian Self-Distillation for High-Quality Large Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25250-25260} }
SEHDR: Single-Exposure HDR Novel View Synthesis via 3D Gaussian Bracketing-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Yiyu and Wang, Haoyuan and Xu, Ke and Hancke, Gerhard Petrus and Lau, Rynson W.H.}, title = {SEHDR: Single-Exposure HDR Novel View Synthesis via 3D Gaussian Bracketing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26045-26054} }
UPP: Unified Point-Level Prompting for Robust Point Cloud Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ai_2025_ICCV, author = {Ai, Zixiang and Cui, Zhenyu and Peng, Yuxin and Zhou, Jiahuan}, title = {UPP: Unified Point-Level Prompting for Robust Point Cloud Analysis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27359-27368} }
MOSAIC: Generating Consistent, Privacy-Preserving Scenes from Multiple Depth Views in Multi-Room Environments-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Zhixuan and Zhu, Haokun and Chen, Rui and Francis, Jonathan and Hwang, Soonmin and Zhang, Ji and Oh, Jean}, title = {MOSAIC: Generating Consistent, Privacy-Preserving Scenes from Multiple Depth Views in Multi-Room Environments}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27456-27465} }
Towards Video Thinking Test: A Holistic Benchmark for Advanced Video Reasoning and Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yuanhan and Chew, Yunice and Dong, Yuhao and Leo, Aria and Hu, Bo and Liu, Ziwei}, title = {Towards Video Thinking Test: A Holistic Benchmark for Advanced Video Reasoning and Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20626-20636} }
Decoupled Diffusion Sparks Adaptive Scene Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Yunsong and Ye, Naisheng and Ljungbergh, William and Li, Tianyu and Yang, Jiazhi and Yang, Zetong and Zhu, Hongzi and Petersson, Christoffer and Li, Hongyang}, title = {Decoupled Diffusion Sparks Adaptive Scene Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27760-27770} }
PriorMotion: Generative Class-Agnostic Motion Prediction with Raster-Vector Motion Field Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Qian_2025_ICCV, author = {Qian, Kangan and Miao, Jinyu and Jiao, Xinyu and Luo, Ziang and Fu, Zheng and Shi, Yining and Wang, Yunlong and Jiang, Kun and Yang, Diange}, title = {PriorMotion: Generative Class-Agnostic Motion Prediction with Raster-Vector Motion Field Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27284-27294} }
Scaling Tumor Segmentation: Best Lessons from Real and Synthetic Data-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Qi and Zhou, Xinze and Liu, Chen and Chen, Hao and Li, Wenxuan and Jiang, Zekun and Huang, Ziyan and Zhao, Yuxuan and Yu, Dexin and He, Junjun and Zheng, Yefeng and Shao, Ling and Yuille, Alan and Zhou, Zongwei}, title = {Scaling Tumor Segmentation: Best Lessons from Real and Synthetic Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24001-24013} }
MobileViCLIP: An Efficient Video-Text Model for Mobile Devices-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Min and Jia, Zihan and Dai, Zhilin and Guo, Sheng and Wang, Limin}, title = {MobileViCLIP: An Efficient Video-Text Model for Mobile Devices}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20824-20835} }
SignRep: Enhancing Self-Supervised Sign Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wong_2025_ICCV, author = {Wong, Ryan and Camgoz, Necati Cihan and Bowden, Richard}, title = {SignRep: Enhancing Self-Supervised Sign Representations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22804-22814} }
Visual Relation Diffusion for Human-Object Interaction Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Cao_2025_ICCV, author = {Cao, Ping and Tang, Yepeng and Zhang, Chunjie and Zheng, Xiaolong and Liang, Chao and Wei, Yunchao and Zhao, Yao}, title = {Visual Relation Diffusion for Human-Object Interaction Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23551-23560} }
Egocentric Action-aware Inertial Localization in Point Clouds with Vision-Language Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Mingfang and Yonetani, Ryo and Huang, Yifei and Ouyang, Liangyang and Liu, Ruicong and Sato, Yoichi}, title = {Egocentric Action-aware Inertial Localization in Point Clouds with Vision-Language Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27209-27219} }
EVT: Efficient View Transformation for Multi-Modal 3D Object Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Yongjin and Jeong, Hyeon-Mun and Jeon, Yurim and Kim, Sanghyun}, title = {EVT: Efficient View Transformation for Multi-Modal 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26632-26642} }
Vision-Language Neural Graph Featurization for Extracting Retinal Lesions-
[pdf]
[bibtex]@InProceedings{Hassan_2025_ICCV, author = {Hassan, Taimur and Sohail, Anabia and Naseer, Muzammal and Werghi, Naoufel}, title = {Vision-Language Neural Graph Featurization for Extracting Retinal Lesions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23700-23709} }
V2XPnP: Vehicle-to-Everything Spatio-Temporal Fusion for Multi-Agent Perception and Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Zewei and Xiang, Hao and Zheng, Zhaoliang and Zhao, Seth Z. and Lei, Mingyue and Zhang, Yun and Cai, Tianhui and Liu, Xinyi and Liu, Johnson and Bajji, Maheswari and Xia, Xin and Huang, Zhiyu and Zhou, Bolei and Ma, Jiaqi}, title = {V2XPnP: Vehicle-to-Everything Spatio-Temporal Fusion for Multi-Agent Perception and Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25399-25409} }
Fix-CLIP: Dual-Branch Hierarchical Contrastive Learning via Synthetic Captions for Better Understanding of Long Text-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Bingchao and Ning, Zhiwei and Ding, Jianyu and Gao, Xuanang and Li, Yin and Jiang, Dongsheng and Yang, Jie and Liu, Wei}, title = {Fix-CLIP: Dual-Branch Hierarchical Contrastive Learning via Synthetic Captions for Better Understanding of Long Text}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20694-20704} }
ETVA: Evaluation of Text-to-Video Alignment via Fine-grained Question Generation and Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guan_2025_ICCV, author = {Guan, Kaisi and Lai, Zhengfeng and Sun, Yuchong and Zhang, Peng and Liu, Wei and Liu, Kieran and Cao, Meng and Song, Ruihua}, title = {ETVA: Evaluation of Text-to-Video Alignment via Fine-grained Question Generation and Answering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21299-21309} }
Outdoor Monocular SLAM with Global Scale-Consistent 3D Gaussian Pointmaps-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2025_ICCV, author = {Cheng, Chong and Yu, Sicheng and Wang, Zijian and Zhou, Yifan and Wang, Hao}, title = {Outdoor Monocular SLAM with Global Scale-Consistent 3D Gaussian Pointmaps}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26035-26044} }
7DGS: Unified Spatial-Temporal-Angular Gaussian Splatting-
[pdf]
[arXiv]
[bibtex]@InProceedings{Gao_2025_ICCV, author = {Gao, Zhongpai and Planche, Benjamin and Zheng, Meng and Choudhuri, Anwesa and Chen, Terrence and Wu, Ziyan}, title = {7DGS: Unified Spatial-Temporal-Angular Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26316-26325} }
Robust and Efficient 3D Gaussian Splatting for Urban Scene Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2025_ICCV, author = {Yuan, Zhensheng and Huang, Haozhi and Xiong, Zhen and Wang, Di and Yang, Guanghua}, title = {Robust and Efficient 3D Gaussian Splatting for Urban Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26209-26219} }
Iris: Breaking GUI Complexity with Adaptive Focus and Self-Refining-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ge_2025_ICCV, author = {Ge, Zhiqi and Li, Juncheng and Pang, Xinglei and Gao, Minghe and Pan, Kaihang and Lin, Wang and Fei, Hao and Zhang, Wenqiao and Tang, Siliang and Zhuang, Yueting}, title = {Iris: Breaking GUI Complexity with Adaptive Focus and Self-Refining}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24559-24568} }
SciVid: Cross-Domain Evaluation of Video Models in Scientific Applications-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hasson_2025_ICCV, author = {Hasson, Yana and Luc, Pauline and Momeni, Liliane and Ovsjanikov, Maks and Le Moing, Guillaume and Kuznetsova, Alina and Ktena, Ira and Sun, Jennifer J. and Koppula, Skanda and Gokay, Dilara and Heyward, Joseph and Pot, Etienne and Zisserman, Andrew}, title = {SciVid: Cross-Domain Evaluation of Video Models in Scientific Applications}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21800-21811} }
Vision-Language Models Can't See the Obvious-
[pdf]
[bibtex]@InProceedings{Huynh_2025_ICCV, author = {Huynh, Ngoc Dung and Le-Khac, Phuc H and Para, Wamiq Reyaz and Singh, Ankit and Narayan, Sanath}, title = {Vision-Language Models Can't See the Obvious}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24159-24169} }
CountSE: Soft Exemplar Open-set Object Counting-
[pdf]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Shuai and Zhang, Peng and Zhang, Shiwei and Ke, Wei}, title = {CountSE: Soft Exemplar Open-set Object Counting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21536-21546} }
MIORe & VAR-MIORe: Benchmarks to Push the Boundaries of Restoration-
[pdf]
[supp]
[bibtex]@InProceedings{Ciubotariu_2025_ICCV, author = {Ciubotariu, George and Zhou, Zhuyun and Wu, Zongwei and Timofte, Radu}, title = {MIORe \& VAR-MIORe: Benchmarks to Push the Boundaries of Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19784-19793} }
Thermal Polarimetric Multi-view Stereo-
[pdf]
[bibtex]@InProceedings{Kushida_2025_ICCV, author = {Kushida, Takahiro and Tanaka, Kenichiro}, title = {Thermal Polarimetric Multi-view Stereo}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27390-27399} }
SparseVILA: Decoupling Visual Sparsity for Efficient VLM Inference-
[pdf]
[supp]
[bibtex]@InProceedings{Khaki_2025_ICCV, author = {Khaki, Samir and Guo, Junxian and Tang, Jiaming and Yang, Shang and Chen, Yukang and Plataniotis, Konstantinos N. and Lu, Yao and Han, Song and Liu, Zhijian}, title = {SparseVILA: Decoupling Visual Sparsity for Efficient VLM Inference}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23784-23794} }
Relative Illumination Fields: Learning Medium and Light Independent Underwater Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{She_2025_ICCV, author = {She, Mengkun and Seegr\"aber, Felix and Nakath, David and Sch\"ontag, Patricia and K\"oser, Kevin}, title = {Relative Illumination Fields: Learning Medium and Light Independent Underwater Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29110-29119} }
Wave-MambaAD: Wavelet-driven State Space Model for Multi-class Unsupervised Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Qiao and Shao, Mingwen and Chen, Xinyuan and Lv, Xiang and Xu, Kai}, title = {Wave-MambaAD: Wavelet-driven State Space Model for Multi-class Unsupervised Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20868-20877} }
Inter2Former: Dynamic Hybrid Attention for Efficient High-Precision Interactive Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, You and Chen, Lichao and Ji, Jiayi and Cao, Liujuan and Zhang, Shengchuan and Ji, Rongrong}, title = {Inter2Former: Dynamic Hybrid Attention for Efficient High-Precision Interactive Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19816-19826} }
KDA: Knowledge Diffusion Alignment with Enhanced Context for Video Temporal Grounding-
[pdf]
[bibtex]@InProceedings{Ran_2025_ICCV, author = {Ran, Ran and Wei, Jiwei and He, Shiyuan and Ma, Zeyu and Zhang, Chaoning and Xie, Ning and Yang, Yang}, title = {KDA: Knowledge Diffusion Alignment with Enhanced Context for Video Temporal Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23311-23320} }
TrackAny3D: Transferring Pretrained 3D Models for Category-unified 3D Point Cloud Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Mengmeng and Wang, Haonan and Li, Yulong and Kong, Xiangjie and Du, Jiaxin and Shen, Guojiang and Xia, Feng}, title = {TrackAny3D: Transferring Pretrained 3D Models for Category-unified 3D Point Cloud Tracking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28249-28259} }
S2M2: Scalable Stereo Matching Model for Reliable Depth Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Min_2025_ICCV, author = {Min, Junhong and Jeon, Youngpil and Kim, Jimin and Choi, Minyong}, title = {S2M2: Scalable Stereo Matching Model for Reliable Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26729-26739} }
NETracer: A Topology-Aware Iterative Tracing Approach for Tubular Structure Extraction-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Chao and Jiang, Yangbo and Zheng, Nenggan}, title = {NETracer: A Topology-Aware Iterative Tracing Approach for Tubular Structure Extraction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20593-20602} }
Towards Open-World Generation of Stereo Images and Unsupervised Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qiao_2025_ICCV, author = {Qiao, Feng and Xiong, Zhexiao and Xing, Eric and Jacobs, Nathan}, title = {Towards Open-World Generation of Stereo Images and Unsupervised Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26579-26589} }
PolarAnything: Diffusion-based Polarimetric Image Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Kailong and Lyu, Youwei and Guo, Heng and Li, Si and Ma, Zhanyu and Shi, Boxin}, title = {PolarAnything: Diffusion-based Polarimetric Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26466-26476} }
What's Making That Sound Right Now? Video-centric Audio-Visual Localization-
[pdf]
[supp]
[bibtex]@InProceedings{Choi_2025_ICCV, author = {Choi, Hahyeon and Lee, Junhoo and Kwak, Nojun}, title = {What's Making That Sound Right Now? Video-centric Audio-Visual Localization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20095-20104} }
Scheduling Weight Transitions for Quantization-Aware Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Junghyup and Jeon, Jeimin and Kim, Dohyung and Ham, Bumsub}, title = {Scheduling Weight Transitions for Quantization-Aware Training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23466-23475} }
Efficient Spiking Point Mamba for Point Cloud Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Peixi and Chai, Bosong and Zheng, Menghua and Li, Wei and Hu, Zhangchi and Chen, Jie and Zhang, Zheyu and Li, Hebei and Sun, Xiaoyan}, title = {Efficient Spiking Point Mamba for Point Cloud Analysis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26393-26403} }
FE-CLIP: Frequency Enhanced CLIP Model for Zero-Shot Anomaly Detection and Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Gong_2025_ICCV, author = {Gong, Tao and Chu, Qi and Liu, Bin and Zhou, Wei and Yu, Nenghai}, title = {FE-CLIP: Frequency Enhanced CLIP Model for Zero-Shot Anomaly Detection and Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21220-21230} }
Alleviating Textual Reliance in Medical Language-guided Segmentation via Prototype-driven Semantic Approximation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2025_ICCV, author = {Ye, Shuchang and Naseem, Usman and Meng, Mingyuan and Kim, Jinman}, title = {Alleviating Textual Reliance in Medical Language-guided Segmentation via Prototype-driven Semantic Approximation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22316-22326} }
CoTMR: Chain-of-Thought Multi-Scale Reasoning for Training-Free Zero-Shot Composed Image Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2025_ICCV, author = {Sun, Zelong and Jing, Dong and Lu, Zhiwu}, title = {CoTMR: Chain-of-Thought Multi-Scale Reasoning for Training-Free Zero-Shot Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22675-22684} }
BASIC: Boosting Visual Alignment with Intrinsic Refined Embeddings in Multimodal Large Language Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Tang_2025_ICCV, author = {Tang, Jianting and Wang, Yubo and Cao, Haoyu and Xu, Linli}, title = {BASIC: Boosting Visual Alignment with Intrinsic Refined Embeddings in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20582-20592} }
Long-term Traffic Simulation with Interleaved Autoregressive Motion and Scenario Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Xiuyu and Tan, Shuhan and Kr\"ahenb\"uhl, Philipp}, title = {Long-term Traffic Simulation with Interleaved Autoregressive Motion and Scenario Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25305-25314} }
RhythmGuassian: Repurposing Generalizable Gaussian Model For Remote Physiological Measurement-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2025_ICCV, author = {Lu, Hao and Zhang, Yuting and Tang, Jiaqi and Fu, Bowen and Ge, Wenhang and Wei, Wei and Wu, Kaishun and Chen, Yingcong}, title = {RhythmGuassian: Repurposing Generalizable Gaussian Model For Remote Physiological Measurement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20780-20790} }
DASH: Detection and Assessment of Systematic Hallucinations of VLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Augustin_2025_ICCV, author = {Augustin, Maximilian and Neuhaus, Yannic and Hein, Matthias}, title = {DASH: Detection and Assessment of Systematic Hallucinations of VLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22748-22759} }
On the Provable Importance of Gradients for Autonomous Language-Assisted Image Clustering-
[pdf]
[supp]
[bibtex]@InProceedings{Peng_2025_ICCV, author = {Peng, Bo and Lu, Jie and Zhang, Guangquan and Fang, Zhen}, title = {On the Provable Importance of Gradients for Autonomous Language-Assisted Image Clustering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19805-19815} }
ZipVL: Accelerating Vision-Language Models through Dynamic Token Sparsity-
[pdf]
[supp]
[bibtex]@InProceedings{He_2025_ICCV, author = {He, Yefei and Chen, Feng and Liu, Jing and Shao, Wenqi and Zhou, Hong and Zhang, Kaipeng and Zhuang, Bohan}, title = {ZipVL: Accelerating Vision-Language Models through Dynamic Token Sparsity}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20477-20486} }
RayletDF: Raylet Distance Fields for Generalizable 3D Surface Reconstruction from Point Clouds or Gaussians-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2025_ICCV, author = {Wei, Shenxing and Li, Jinxi and Yang, Yafei and Zhou, Siyuan and Yang, Bo}, title = {RayletDF: Raylet Distance Fields for Generalizable 3D Surface Reconstruction from Point Clouds or Gaussians}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25616-25626} }
Planar Affine Rectification from Local Change of Scale and Orientation-
[pdf]
[supp]
[bibtex]@InProceedings{Nissan_2025_ICCV, author = {Nissan, Yuval and Pollefeys, Marc and Barath, Daniel}, title = {Planar Affine Rectification from Local Change of Scale and Orientation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27147-27155} }
You Share Beliefs, I Adapt: Progressive Heterogeneous Collaborative Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Si_2025_ICCV, author = {Si, Hao and Javanmardi, Ehsan and Tsukada, Manabu}, title = {You Share Beliefs, I Adapt: Progressive Heterogeneous Collaborative Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27521-27530} }
Advancing Visual Large Language Model for Multi-granular Versatile Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiang_2025_ICCV, author = {Xiang, Wentao and Tan, Haoxian and Zhong, Yujie and Wei, Cong and Li, Dengjie and Yang, Yujiu}, title = {Advancing Visual Large Language Model for Multi-granular Versatile Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22153-22164} }
STD-GS: Exploring Frame-Event Interaction for SpatioTemporal-Disentangled Gaussian Splatting to Reconstruct High-Dynamic Scene-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Hanyu and Wang, Haonan and Liu, Haoyue and Duan, Yuxing and Yan, Luxin and Lee, Gim Hee}, title = {STD-GS: Exploring Frame-Event Interaction for SpatioTemporal-Disentangled Gaussian Splatting to Reconstruct High-Dynamic Scene}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24801-24810} }
C2MIL: Synchronizing Semantic and Topological Causalities in Multiple Instance Learning for Robust and Interpretable Survival Analysis-
[pdf]
[supp]
[bibtex]@InProceedings{Cen_2025_ICCV, author = {Cen, Min and Zhuang, Zhenfeng and Zhang, Yuzhe and Zeng, Min and Magnier, Baptiste and Yu, Lequan and Zhang, Hong and Wang, Liansheng}, title = {C2MIL: Synchronizing Semantic and Topological Causalities in Multiple Instance Learning for Robust and Interpretable Survival Analysis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24392-24401} }
Object-level Correlation for Few-Shot Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wen_2025_ICCV, author = {Wen, Chunlin and Zhang, Yu and Fan, Jie and Zhu, Hongyuan and Wei, Xiu-Shen and Wang, Yijun and Kou, Zhiqiang and Sun, Shuzhou}, title = {Object-level Correlation for Few-Shot Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23689-23699} }
CVFusion: Cross-View Fusion of 4D Radar and Camera for 3D Object Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhong_2025_ICCV, author = {Zhong, Hanzhi and Xiang, Zhiyu and Xu, Ruoyu and Fu, Jingyun and Xu, Peng and Wang, Shaohong and Yang, Zhihao and Pu, Tianyu and Liu, Eryun}, title = {CVFusion: Cross-View Fusion of 4D Radar and Camera for 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28188-28197} }
ViT-Linearizer: Distilling Quadratic Knowledge into Linear-Time Vision Models-
[pdf]
[supp]
[bibtex]@InProceedings{Wei_2025_ICCV, author = {Wei, Guoyizhe and Chellappa, Rama}, title = {ViT-Linearizer: Distilling Quadratic Knowledge into Linear-Time Vision Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20737-20747} }
RobustSplat: Decoupling Densification and Dynamics for Transient-Free 3DGS-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fu_2025_ICCV, author = {Fu, Chuanyu and Zhang, Yuqi and Yao, Kunbin and Chen, Guanying and Xiong, Yuan and Huang, Chuan and Cui, Shuguang and Cao, Xiaochun}, title = {RobustSplat: Decoupling Densification and Dynamics for Transient-Free 3DGS}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27126-27136} }
FOLDER: Accelerating Multi-Modal Large Language Models with Enhanced Performance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Haicheng and Yu, Zhemeng and Spadaro, Gabriele and Ju, Chen and Qu\'etu, Victor and Xiao, Shuai and Tartaglione, Enzo}, title = {FOLDER: Accelerating Multi-Modal Large Language Models with Enhanced Performance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23614-23625} }
Learn2Synth: Learning Optimal Data Synthesis Using Hypergradients for Brain Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2025_ICCV, author = {Hu, Xiaoling and Zeng, Xiangrui and Puonti, Oula and Iglesias, Juan Eugenio and Fischl, Bruce and Balbastre, Ya\"el}, title = {Learn2Synth: Learning Optimal Data Synthesis Using Hypergradients for Brain Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20368-20378} }
Bolt3D: Generating 3D Scenes in Seconds-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Szymanowicz_2025_ICCV, author = {Szymanowicz, Stanislaw and Zhang, Jason Y. and Srinivasan, Pratul and Gao, Ruiqi and Brussee, Arthur and Holynski, Aleksander and Martin-Brualla, Ricardo and Barron, Jonathan T. and Henzler, Philipp}, title = {Bolt3D: Generating 3D Scenes in Seconds}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24846-24857} }
Know Your Attention Maps: Class-specific Token Masking for Weakly Supervised Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hanna_2025_ICCV, author = {Hanna, Jo\"elle and Borth, Damian}, title = {Know Your Attention Maps: Class-specific Token Masking for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23763-23772} }
Reducing Unimodal Bias in Multi-Modal Semantic Segmentation with Multi-Scale Functional Entropy Regularization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_ICCV, author = {Zheng, Xu and Lyu, Yuanhuiyi and Jiang, Lutao and Paudel, Danda Pani and Van Gool, Luc and Hu, Xuming}, title = {Reducing Unimodal Bias in Multi-Modal Semantic Segmentation with Multi-Scale Functional Entropy Regularization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21166-21176} }
ETA: Efficiency through Thinking Ahead, A Dual Approach to Self-Driving with Large Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hamdan_2025_ICCV, author = {Hamdan, Shadi and Sima, Chonghao and Yang, Zetong and Li, Hongyang and Guney, Fatma}, title = {ETA: Efficiency through Thinking Ahead, A Dual Approach to Self-Driving with Large Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26529-26538} }
Understanding Personal Concept in Open-Vocabulary Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2025_ICCV, author = {Park, Sunghyun and Lee, Jungsoo and Borse, Shubhankar and Hayat, Munawar and Choi, Sungha and Hwang, Kyuwoong and Porikli, Fatih}, title = {Understanding Personal Concept in Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19957-19966} }
Fuzzy Contrastive Decoding to Alleviate Object Hallucination in Large Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Jieun and Kim, Jinmyeong and Kim, Yoonji and Cho, Sung-Bae}, title = {Fuzzy Contrastive Decoding to Alleviate Object Hallucination in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20572-20581} }
Boosting Multimodal Learning via Disentangled Gradient Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wei_2025_ICCV, author = {Wei, Shicai and Luo, Chunbo and Luo, Yang}, title = {Boosting Multimodal Learning via Disentangled Gradient Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22879-22888} }
DisTime: Distribution-based Time Representation for Video Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2025_ICCV, author = {Zeng, Yingsen and Huang, Zepeng and Zhong, Yujie and Feng, Chengjian and Hu, Jie and Ma, Lin and Liu, Yang}, title = {DisTime: Distribution-based Time Representation for Video Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21961-21971} }
COIN: Confidence Score-Guided Distillation for Annotation-Free Cell Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jo_2025_ICCV, author = {Jo, Sanghyun and Lee, Seo Jin and Lee, Seungwoo and Hong, Seohyung and Seo, Hyungseok and Kim, Kyungsu}, title = {COIN: Confidence Score-Guided Distillation for Annotation-Free Cell Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20324-20335} }
3D Test-time Adaptation via Graph Spectral Driven Point Shift-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2025_ICCV, author = {Wei, Xin and Yang, Qin and Fang, Yijie and Zhu, Mingrui and Wang, Nannan}, title = {3D Test-time Adaptation via Graph Spectral Driven Point Shift}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26762-26771} }
StruMamba3D: Exploring Structural Mamba for Self-supervised Point Cloud Representation Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Chuxin and Zha, Yixin and Yang, Wenfei and Zhang, Tianzhu}, title = {StruMamba3D: Exploring Structural Mamba for Self-supervised Point Cloud Representation Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28546-28555} }
Easy3D: A Simple Yet Effective Method for 3D Interactive Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Simonelli_2025_ICCV, author = {Simonelli, Andrea and M\"uller, Norman and Kontschieder, Peter}, title = {Easy3D: A Simple Yet Effective Method for 3D Interactive Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24707-24716} }
DynImg: Key Frames with Visual Prompts are Good Representation for Multi-Modal Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bao_2025_ICCV, author = {Bao, Xiaoyi and Xie, Chenwei and Tang, Hao and Weng, Tingyu and Wang, Xiaofeng and Zheng, Yun and Wang, Xingang}, title = {DynImg: Key Frames with Visual Prompts are Good Representation for Multi-Modal Video Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23678-23688} }
Self-Supervised Sparse Sensor Fusion for Long Range Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Palladin_2025_ICCV, author = {Palladin, Edoardo and Brucker, Samuel and Ghilotti, Filippo and Narayanan, Praveen and Bijelic, Mario and Heide, Felix}, title = {Self-Supervised Sparse Sensor Fusion for Long Range Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27498-27509} }
GeoDistill: Geometry-Guided Self-Distillation for Weakly Supervised Cross-View Localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tong_2025_ICCV, author = {Tong, Shaowen and Xia, Zimin and Alahi, Alexandre and He, Xuming and Shi, Yujiao}, title = {GeoDistill: Geometry-Guided Self-Distillation for Weakly Supervised Cross-View Localization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25357-25366} }
Leveraging 2D Priors and SDF Guidance for Urban Scene Rendering-
[pdf]
[supp]
[bibtex]@InProceedings{Tourani_2025_ICCV, author = {Tourani, Siddharth and Reddy, Jayaram and Kumbar, Akash and Tourani, Satyajit and Goyal, Nishant and Krishna, Madhava and Reddy, N Dinesh and Khan, Muhammad Haris}, title = {Leveraging 2D Priors and SDF Guidance for Urban Scene Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29051-29063} }
SVTRv2: CTC Beats Encoder-Decoder Models in Scene Text Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Du_2025_ICCV, author = {Du, Yongkun and Chen, Zhineng and Xie, Hongtao and Jia, Caiyan and Jiang, Yu-Gang}, title = {SVTRv2: CTC Beats Encoder-Decoder Models in Scene Text Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20147-20156} }
CNS-Bench: Benchmarking Image Classifier Robustness Under Continuous Nuisance Shifts-
[pdf]
[supp]
[bibtex]@InProceedings{Dunkel_2025_ICCV, author = {D\"unkel, Olaf and Jesslen, Artur and Xie, Jiahao and Theobalt, Christian and Rupprecht, Christian and Kortylewski, Adam}, title = {CNS-Bench: Benchmarking Image Classifier Robustness Under Continuous Nuisance Shifts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19978-19988} }
Noise2Score3D: Tweedie's Approach for Unsupervised Point Cloud Denoising-
[pdf]
[supp]
[bibtex]@InProceedings{Wei_2025_ICCV, author = {Wei, Xiangbin and Wang, Yuanfeng and Xu, Ao and Zhu, Lingyu and Sun, Dongyong and Li, Keren and Li, Yang and Qin, Qi}, title = {Noise2Score3D: Tweedie's Approach for Unsupervised Point Cloud Denoising}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25993-26003} }
I2-World: Intra-Inter Tokenization for Efficient Dynamic 4D Scene Forecasting-
[pdf]
[bibtex]@InProceedings{Liao_2025_ICCV, author = {Liao, Zhimin and Wei, Ping and Zhang, Ruijie and Chen, Shuaijia and Wang, Haoxuan and Ren, Ziyang}, title = {I2-World: Intra-Inter Tokenization for Efficient Dynamic 4D Scene Forecasting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25810-25819} }
Auto-Vocabulary Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Ulger_2025_ICCV, author = {\"Ulger, Osman and Kulicki, Maksymilian and Asano, Yuki and Oswald, Martin R.}, title = {Auto-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24266-24275} }
MSA2: Multi-task Framework with Structure-aware and Style-adaptive Character Representation for Open-set Chinese Text Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Yangfu and Zhan, Hongjian and Liu, Qi and Sun, Li and Xiong, Yu-Jie and Lu, Yue}, title = {MSA2: Multi-task Framework with Structure-aware and Style-adaptive Character Representation for Open-set Chinese Text Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23095-23104} }
PCR-GS: COLMAP-Free 3D Gaussian Splatting via Pose Co-Regularizations-
[pdf]
[supp]
[bibtex]@InProceedings{Wei_2025_ICCV, author = {Wei, Yu and Zhang, Jiahui and Zhang, Xiaoqin and Shao, Ling and Lu, Shijian}, title = {PCR-GS: COLMAP-Free 3D Gaussian Splatting via Pose Co-Regularizations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26499-26508} }
Fusion Meets Diverse Conditions: A High-diversity Benchmark and Baseline for UAV-based Multimodal Object Detection with Condition Cues-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Chen and Bin, Kangcheng and Hu, Ting and Qi, Jiahao and Liu, Xingyue and Liu, Tianpeng and Liu, Zhen and Liu, Yongxiang and Zhong, Ping}, title = {Fusion Meets Diverse Conditions: A High-diversity Benchmark and Baseline for UAV-based Multimodal Object Detection with Condition Cues}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27958-27967} }
Stealthy Backdoor Attack in Federated Learning via Adaptive Layer-wise Gradient Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Qingqian and Yan, Peishen and Wu, Xiaoyu and Zhang, Jiaru and Song, Tao and Hua, Yang and Wang, Hao and Wang, Liangliang and Guan, Haibing}, title = {Stealthy Backdoor Attack in Federated Learning via Adaptive Layer-wise Gradient Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29163-29172} }
DiffPCI: Large Motion Point Cloud frame Interpolation with Diffusion Model-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Tianyu and Jiang, Haobo and Yang, Jian and Xie, Jin}, title = {DiffPCI: Large Motion Point Cloud frame Interpolation with Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27348-27358} }
Auto-Controlled Image Perception in MLLMs via Visual Perception Tokens-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2025_ICCV, author = {Yu, Runpeng and Ma, Xinyin and Wang, Xinchao}, title = {Auto-Controlled Image Perception in MLLMs via Visual Perception Tokens}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21822-21831} }
Addressing Representation Collapse in Vector Quantized Models with One Linear Layer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Yongxin and Li, Bocheng and Xin, Yifei and Xia, Zhihua and Xu, Linli}, title = {Addressing Representation Collapse in Vector Quantized Models with One Linear Layer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22968-22977} }
Borrowing Eyes for the Blind Spot: Overcoming Data Scarcity in Malicious Video Detection via Cross-Domain Retrieval Augmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Hong_2025_ICCV, author = {Hong, Rongpei and Lang, Jian and Zhong, Ting and Zhou, Fan}, title = {Borrowing Eyes for the Blind Spot: Overcoming Data Scarcity in Malicious Video Detection via Cross-Domain Retrieval Augmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22728-22737} }
SuperDec: 3D Scene Decomposition with Superquadrics Primitives-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fedele_2025_ICCV, author = {Fedele, Elisabetta and Sun, Boyang and Guibas, Leonidas and Pollefeys, Marc and Engelmann, Francis}, title = {SuperDec: 3D Scene Decomposition with Superquadrics Primitives}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24625-24635} }
LaRender: Training-Free Occlusion Control in Image Generation via Latent Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhan_2025_ICCV, author = {Zhan, Xiaohang and Liu, Dingming}, title = {LaRender: Training-Free Occlusion Control in Image Generation via Latent Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19679-19688} }
DONUT: A Decoder-Only Model for Trajectory Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Knoche_2025_ICCV, author = {Knoche, Markus and de Geus, Daan and Leibe, Bastian}, title = {DONUT: A Decoder-Only Model for Trajectory Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28903-28912} }
Scene Coordinate Reconstruction Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Bian_2025_ICCV, author = {Bian, Wenjing and Barroso-Laguna, Axel and Cavallari, Tommaso and Prisacariu, Victor Adrian and Brachmann, Eric}, title = {Scene Coordinate Reconstruction Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25765-25776} }
Temporal Overlapping Prediction: A Self-supervised Pre-training Method for LiDAR Moving Object Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Miao_2025_ICCV, author = {Miao, Ziliang and Chen, Runjian and Cai, Yixi and He, Buwei and Zhao, Wenquan and Shao, Wenqi and Zhang, Bo and Zhang, Fu}, title = {Temporal Overlapping Prediction: A Self-supervised Pre-training Method for LiDAR Moving Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26653-26663} }
No More Sibling Rivalry: Debiasing Human-Object Interaction Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Bin and Zhang, Yulin and Zhou, Hong-Yu and Yang, Sibei}, title = {No More Sibling Rivalry: Debiasing Human-Object Interaction Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22707-22717} }
Temperature in Cosine-based Softmax Loss-
[pdf]
[supp]
[bibtex]@InProceedings{Kobayashi_2025_ICCV, author = {Kobayashi, Takumi}, title = {Temperature in Cosine-based Softmax Loss}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22199-22208} }
Exploring Probabilistic Modeling Beyond Domain Generalization for Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, I-Hsiang and Chang, Hua-En and Chen, Wei-Ting and Hwang, Jenq-Neng and Kuo, Sy-Yen}, title = {Exploring Probabilistic Modeling Beyond Domain Generalization for Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21755-21765} }
Learning Robust Stereo Matching in the Wild with Selective Mixture-of-Experts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Yun and Wang, Longguang and Zhang, Chenghao and Zhang, Yongjian and Zhang, Zhanjie and Ma, Ao and Fan, Chenyou and Lam, Tin Lun and Hu, Junjie}, title = {Learning Robust Stereo Matching in the Wild with Selective Mixture-of-Experts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21276-21287} }
Token-Efficient VLM: High-Resolution Image Understanding via Dynamic Region Proposal-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2025_ICCV, author = {Jiang, Yitong and Gu, Jinwei and Xue, Tianfan and Cheung, Ka Chun and Molchanov, Pavlo and Yin, Hongxu and Liu, Sifei}, title = {Token-Efficient VLM: High-Resolution Image Understanding via Dynamic Region Proposal}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24147-24158} }
Robust 3D Object Detection using Probabilistic Point Clouds from Single-Photon LiDARs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Goyal_2025_ICCV, author = {Goyal, Bhavya and Gutierrez-Barragan, Felipe and Lin, Wei and Velten, Andreas and Li, Yin and Gupta, Mohit}, title = {Robust 3D Object Detection using Probabilistic Point Clouds from Single-Photon LiDARs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28417-28427} }
TimeExpert: An Expert-Guided Video LLM for Video Temporal Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Zuhao and Yu, Yingchen and Zhao, Yunqing and Lu, Shijian and Bai, Song}, title = {TimeExpert: An Expert-Guided Video LLM for Video Temporal Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24286-24296} }
GS-Occ3D: Scaling Vision-only Occupancy Reconstruction with Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Ye_2025_ICCV, author = {Ye, Baijun and Qin, Minghui and Zhang, Saining and Gong, Moonjun and Zhu, Shaoting and Zhao, Hao and Zhao, Hang}, title = {GS-Occ3D: Scaling Vision-only Occupancy Reconstruction with Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25925-25937} }
R-LiViT: A LiDAR-Visual-Thermal Dataset Enabling Vulnerable Road User Focused Roadside Perception-
[pdf]
[bibtex]@InProceedings{Mirlach_2025_ICCV, author = {Mirlach, Jonas and Wan, Lei and Wiedholz, Andreas and Keen, Hannan Ejaz and Eich, Andreas}, title = {R-LiViT: A LiDAR-Visual-Thermal Dataset Enabling Vulnerable Road User Focused Roadside Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28375-28384} }
Spatially-Varying Autofocus-
[pdf]
[supp]
[bibtex]@InProceedings{Qin_2025_ICCV, author = {Qin, Yingsi and Sankaranarayanan, Aswin C. and O'Toole, Matthew}, title = {Spatially-Varying Autofocus}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24645-24654} }
ZeroStereo: Zero-shot Stereo Matching from Single Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Xianqi and Yang, Hao and Xu, Gangwei and Cheng, Junda and Lin, Min and Deng, Yong and Zang, Jinliang and Chen, Yurui and Yang, Xin}, title = {ZeroStereo: Zero-shot Stereo Matching from Single Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28177-28187} }
Training-Free Industrial Defect Generation with Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Ruyi and Chiu, Yen-Tzu and Chen, Tai-I and Chew, Oscar and Chuang, Yung-Yu and Cheng, Wen-Huang}, title = {Training-Free Industrial Defect Generation with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24214-24223} }
Hierarchical Event Memory for Accurate and Low-latency Online Video Temporal Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_ICCV, author = {Zheng, Minghang and Peng, Yuxin and Sun, Benyuan and Yang, Yi and Liu, Yang}, title = {Hierarchical Event Memory for Accurate and Low-latency Online Video Temporal Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21589-21599} }
Interaction-Merged Motion Planning: Effectively Leveraging Diverse Motion Datasets for Robust Planning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Giwon and Jeong, Wooseong and Park, Daehee and Jeong, Jaewoo and Yoon, Kuk-Jin}, title = {Interaction-Merged Motion Planning: Effectively Leveraging Diverse Motion Datasets for Robust Planning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28610-28621} }
DeRIS: Decoupling Perception and Cognition for Enhanced Referring Image Segmentation through Loopback Synergy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dai_2025_ICCV, author = {Dai, Ming and Cheng, Wenxuan and Liu, Jiang-jiang and Yang, Sen and Cai, Wenxiao and Sun, Yanpeng and Yang, Wankou}, title = {DeRIS: Decoupling Perception and Cognition for Enhanced Referring Image Segmentation through Loopback Synergy}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19936-19946} }
ZIM: Zero-Shot Image Matting for Anything-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Beomyoung and Shin, Chanyong and Jeong, Joonhyun and Jung, Hyungsik and Lee, Se-Yun and Chun, Sewhan and Hwang, Dong-Hyun and Yu, Joonsang}, title = {ZIM: Zero-Shot Image Matting for Anything}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23828-23838} }
Skip-Vision: Efficient and Scalable Acceleration of Vision-Language Models via Adaptive Token Skipping-
[pdf]
[supp]
[bibtex]@InProceedings{Zeng_2025_ICCV, author = {Zeng, Weili and Huang, Ziyuan and Ji, Kaixiang and Yan, Yichao}, title = {Skip-Vision: Efficient and Scalable Acceleration of Vision-Language Models via Adaptive Token Skipping}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21384-21397} }
Enrich and Detect: Video Temporal Grounding with Multimodal LLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Pramanick_2025_ICCV, author = {Pramanick, Shraman and Mavroudi, Effrosyni and Song, Yale and Chellappa, Rama and Torresani, Lorenzo and Afouras, Triantafyllos}, title = {Enrich and Detect: Video Temporal Grounding with Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24297-24308} }
Class Token as Proxy: Optimal Transport-assisted Proxy Learning for Weakly Supervised Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Jian and Dai, Tianhong and Zhang, Bingfeng and Yu, Siyue and Lim, Eng Gee and Xiao, Jimin}, title = {Class Token as Proxy: Optimal Transport-assisted Proxy Learning for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21645-21654} }
FIND: Few-Shot Anomaly Inspection with Normal-Only Multi-Modal Data-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Yiting and Liu, Fayao and Liao, Jingyi and Tian, Sichao and Foo, Chuan-Sheng and Yang, Xulei}, title = {FIND: Few-Shot Anomaly Inspection with Normal-Only Multi-Modal Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23290-23299} }
MaskSAM: Auto-prompt SAM with Mask Classification for Volumetric Medical Image Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2025_ICCV, author = {Xie, Bin and Tang, Hao and Duan, Bin and Cai, Dawen and Yan, Yan and Agam, Gady}, title = {MaskSAM: Auto-prompt SAM with Mask Classification for Volumetric Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24423-24433} }
AID: Adapting Image2Video Diffusion Models for Instruction-guided Video Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xing_2025_ICCV, author = {Xing, Zhen and Dai, Qi and Weng, Zejia and Wu, Zuxuan and Jiang, Yu-Gang}, title = {AID: Adapting Image2Video Diffusion Models for Instruction-guided Video Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21243-21253} }
Splat-based 3D Scene Reconstruction with Extreme Motion-blur-
[pdf]
[supp]
[bibtex]@InProceedings{Jang_2025_ICCV, author = {Jang, Hyeonjoong and Choi, Dongyoung and Kim, Donggun and Kang, Woohyun and Kim, Min H.}, title = {Splat-based 3D Scene Reconstruction with Extreme Motion-blur}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26425-26434} }
Towards Robustness of Person Search against Corruptions-
[pdf]
[supp]
[bibtex]@InProceedings{Son_2025_ICCV, author = {Son, Woojung and Cho, Yoonki and An, Guoyuan and Lee, Chanmi and Yoon, Sung-Eui}, title = {Towards Robustness of Person Search against Corruptions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23408-23418} }
INSTINCT: Instance-Level Interaction Architecture for Query-Based Collaborative Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Yunjiang and Li, Lingzhi and Wang, Jin and Ouyang, Yupeng and Yang, Benyuan}, title = {INSTINCT: Instance-Level Interaction Architecture for Query-Based Collaborative Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25464-25473} }
DriveX: Omni Scene Modeling for Learning Generalizable World Knowledge in Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2025_ICCV, author = {Shi, Chen and Shi, Shaoshuai and Sheng, Kehua and Zhang, Bo and Jiang, Li}, title = {DriveX: Omni Scene Modeling for Learning Generalizable World Knowledge in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28599-28609} }
MagicCity: Geometry-Aware 3D City Generation from Satellite Imagery with Multi-View Consistency-
[pdf]
[supp]
[bibtex]@InProceedings{Yao_2025_ICCV, author = {Yao, Xingbo and Wang, Xuanmin and Wu, Hao and Ping, Chengliang and Zhang, Doudou and Xiong, Hui}, title = {MagicCity: Geometry-Aware 3D City Generation from Satellite Imagery with Multi-View Consistency}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25325-25334} }
RadGPT: Constructing 3D Image-Text Tumor Datasets-
[pdf]
[supp]
[bibtex]@InProceedings{Bassi_2025_ICCV, author = {Bassi, Pedro R.A.S. and Yavuz, Mehmet Can and Hamamci, Ibrahim Ethem and Er, Sezgin and Chen, Xiaoxi and Li, Wenxuan and Menze, Bjoern and Decherchi, Sergio and Cavalli, Andrea and Wang, Kang and Yang, Yang and Yuille, Alan and Zhou, Zongwei}, title = {RadGPT: Constructing 3D Image-Text Tumor Datasets}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23720-23730} }
LangBridge: Interpreting Image as a Combination of Language Embeddings-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liao_2025_ICCV, author = {Liao, Jiaqi and Niu, Yuwei and Meng, Fanqing and Li, Hao and Tian, Changyao and Du, Yinuo and Xiong, Yuwen and Li, Dianqi and Zhu, Xizhou and Yuan, Li and Dai, Jifeng and Cheng, Yu}, title = {LangBridge: Interpreting Image as a Combination of Language Embeddings}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23752-23762} }
Quantifying and Narrowing the Unknown: Interactive Text-to-Video Retrieval via Uncertainty Minimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Bingqing and Cao, Zhuo and Du, Heming and Li, Yang and Li, Xue and Liu, Jiajun and Wang, Sen}, title = {Quantifying and Narrowing the Unknown: Interactive Text-to-Video Retrieval via Uncertainty Minimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22120-22130} }
GECKO: Gigapixel Vision-Concept Contrastive Pretraining in Histopathology-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kapse_2025_ICCV, author = {Kapse, Saarthak and Pati, Pushpak and Yellapragada, Srikar and Das, Srijan and Gupta, Rajarsi R. and Saltz, Joel and Samaras, Dimitris and Prasanna, Prateek}, title = {GECKO: Gigapixel Vision-Concept Contrastive Pretraining in Histopathology}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20020-20030} }
Compression of 3D Gaussian Splatting with Optimized Feature Planes and Standard Video Codecs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Soonbin and Shu, Fangwen and Sanchez, Yago and Schierl, Thomas and Hellge, Cornelius}, title = {Compression of 3D Gaussian Splatting with Optimized Feature Planes and Standard Video Codecs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25496-25505} }
CULTURE3D: A Large-Scale and Diverse Dataset of Cultural Landmarks and Terrains for Gaussian-Based Scene Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_ICCV, author = {Zheng, Xinyi and Zhang, Steve and Lin, Weizhe and Zhang, Aaron and Mayol-Cuevas, Walterio W. and Liu, Yunze and Shen, Junxiao}, title = {CULTURE3D: A Large-Scale and Diverse Dataset of Cultural Landmarks and Terrains for Gaussian-Based Scene Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29064-29074} }
Geo4D: Leveraging Video Generators for Geometric 4D Scene Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_ICCV, author = {Jiang, Zeren and Zheng, Chuanxia and Laina, Iro and Larlus, Diane and Vedaldi, Andrea}, title = {Geo4D: Leveraging Video Generators for Geometric 4D Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20658-20671} }
REPARO: Compositional 3D Assets Generation with Differentiable 3D Layout Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2025_ICCV, author = {Han, Haonan and Yang, Rui and Liao, Huan and Xing, Jiankai and Xu, Zunnan and Yu, Xiaoming and Zha, Junwei and Li, Xiu and Li, Wanhua}, title = {REPARO: Compositional 3D Assets Generation with Differentiable 3D Layout Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25367-25377} }
SparseMM: Head Sparsity Emerges from Visual Concept Responses in MLLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Jiahui and Liu, Zuyan and Rao, Yongming and Lu, Jiwen}, title = {SparseMM: Head Sparsity Emerges from Visual Concept Responses in MLLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23177-23187} }
Focal Plane Visual Feature Generation and Matching on a Pixel Processor Array-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Hongyi and Bose, Laurie and Chen, Jianing and Dudek, Piotr and Mayol-Cuevas, Walterio}, title = {Focal Plane Visual Feature Generation and Matching on a Pixel Processor Array}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29031-29039} }
Q-Frame: Query-aware Frame Selection and Multi-Resolution Adaptation for Video-LLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Shaojie and Yang, Jiahui and Yin, Jianqin and Luo, Zhenbo and Luan, Jian}, title = {Q-Frame: Query-aware Frame Selection and Multi-Resolution Adaptation for Video-LLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22056-22065} }
Prompt-driven Transferable Adversarial Attack on Person Re-Identification with Attribute-aware Textual Inversion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bian_2025_ICCV, author = {Bian, Yuan and Liu, Min and Yi, Yunqi and Wang, Xueping and Jiang, Shuai and Wang, Yaonan}, title = {Prompt-driven Transferable Adversarial Attack on Person Re-Identification with Attribute-aware Textual Inversion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22599-22609} }
MDP-Omni: Parameter-free Multimodal Depth Prior-based Sampling for Omnidirectional Stereo Matching-
[pdf]
[bibtex]@InProceedings{Son_2025_ICCV, author = {Son, Eunjin and Jo, HyungGi and Kwon, Wookyong and Lee, Sang Jun}, title = {MDP-Omni: Parameter-free Multimodal Depth Prior-based Sampling for Omnidirectional Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26178-26187} }
RoboTron-Sim: Improving Real-World Driving via Simulated Hard-Case-
[pdf]
[supp]
[bibtex]@InProceedings{Xiao_2025_ICCV, author = {Xiao, Baihui and Feng, Chengjian and Huang, Zhijian and Yan, Feng and Zhong, Yujie and Ma, Lin}, title = {RoboTron-Sim: Improving Real-World Driving via Simulated Hard-Case}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27380-27389} }
Cross-Architecture Distillation Made Simple with Redundancy Suppression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Weijia and Liu, Yuehao and Ran, Wu and Ma, Chao}, title = {Cross-Architecture Distillation Made Simple with Redundancy Suppression}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23256-23266} }
M2EIT: Multi-Domain Mixture of Experts for Robust Neural Inertial Tracking-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Yan and Xu, Yang and Chen, Changhao and Shi, Zhongchen and Chen, Wei and Xie, Liang and Chen, Hongbo and Yin, Erwei}, title = {M2EIT: Multi-Domain Mixture of Experts for Robust Neural Inertial Tracking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28207-28216} }
Identity-aware Language Gaussian Splatting for Open-vocabulary 3D Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Jang_2025_ICCV, author = {Jang, SungMin and Kim, Wonjun}, title = {Identity-aware Language Gaussian Splatting for Open-vocabulary 3D Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20467-20476} }
Degradation-Modeled Multipath Diffusion for Tunable Metalens Photography-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Jianing and Zhu, Jiayi and Ji, Feiyu and Yang, Xiaokang and Yuan, Xiaoyun}, title = {Degradation-Modeled Multipath Diffusion for Tunable Metalens Photography}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25914-25924} }
Driving View Synthesis on Free-form Trajectories with Generative Prior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Zeyu and Pan, Zijie and Yang, Yuankun and Zhu, Xiatian and Zhang, Li}, title = {Driving View Synthesis on Free-form Trajectories with Generative Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28083-28092} }
StochasticSplats: Stochastic Rasterization for Sorting-Free 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kheradmand_2025_ICCV, author = {Kheradmand, Shakiba and Vicini, Delio and Kopanas, George and Lagun, Dmitry and Yi, Kwang Moo and Matthews, Mark and Tagliasacchi, Andrea}, title = {StochasticSplats: Stochastic Rasterization for Sorting-Free 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26326-26335} }
Aligning Information Capacity Between Vision and Language via Dense-to-Sparse Feature Distillation for Image-Text Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Yang and Feng, Wentao and Liu, Zhuoyao and Huang, Shudong and Lv, Jiancheng}, title = {Aligning Information Capacity Between Vision and Language via Dense-to-Sparse Feature Distillation for Image-Text Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21679-21688} }
A Lesson in Splats: Teacher-Guided Diffusion for 3D Gaussian Splats Generation with 2D Supervision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2025_ICCV, author = {Peng, Chensheng and Sobol, Ido and Tomizuka, Masayoshi and Keutzer, Kurt and Xu, Chenfeng and Litany, Or}, title = {A Lesson in Splats: Teacher-Guided Diffusion for 3D Gaussian Splats Generation with 2D Supervision}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28707-28717} }
Boosting Vision Semantic Density with Anatomy Normality Modeling for Medical Vision-language Pre-training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2025_ICCV, author = {Cao, Weiwei and Zhang, Jianpeng and Shui, Zhongyi and Wang, Sinuo and Chen, Zeli and Li, Xi and Lu, Le and Ye, Xianghua and Zhang, Qi and Liang, Tingbo and Zhang, Ling}, title = {Boosting Vision Semantic Density with Anatomy Normality Modeling for Medical Vision-language Pre-training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23041-23050} }
GAP: Gaussianize Any Point Clouds with Text Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Weiqi and Zhou, Junsheng and Geng, Haotian and Zhang, Wenyuan and Liu, Yu-Shen}, title = {GAP: Gaussianize Any Point Clouds with Text Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25627-25638} }
Flow4Agent: Long-form Video Understanding via Motion Prior from Optical Flow-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Ruyang and Sun, Shangkun and Tang, Haoran and Gao, Wei and Li, Ge}, title = {Flow4Agent: Long-form Video Understanding via Motion Prior from Optical Flow}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23817-23827} }
Moto: Latent Motion Token as the Bridging Language for Learning Robot Manipulation from Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Yi and Ge, Yuying and Tang, Weiliang and Li, Yizhuo and Ge, Yixiao and Ding, Mingyu and Shan, Ying and Liu, Xihui}, title = {Moto: Latent Motion Token as the Bridging Language for Learning Robot Manipulation from Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19752-19763} }
BezierGS: Dynamic Urban Scene Reconstruction with Bezier Curve Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2025_ICCV, author = {Ma, Zipei and Jiang, Junzhe and Chen, Yurui and Zhang, Li}, title = {BezierGS: Dynamic Urban Scene Reconstruction with Bezier Curve Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25519-25528} }
RealCam-I2V: Real-World Image-to-Video Generation with Interactive Complex Camera Control-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Teng and Zheng, Guangcong and Jiang, Rui and Zhan, Shuigen and Wu, Tao and Lu, Yehao and Lin, Yining and Deng, Chuanyun and Xiong, Yepan and Chen, Min and Cheng, Lin and Li, Xi}, title = {RealCam-I2V: Real-World Image-to-Video Generation with Interactive Complex Camera Control}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28785-28796} }
Breaking the Encoder Barrier for Seamless Video-Language Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Handong and Zhang, Yiyuan and Guo, Longteng and Yue, Xiangyu and Liu, Jing}, title = {Breaking the Encoder Barrier for Seamless Video-Language Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23167-23176} }
Simulating Dual-Pixel Images From Ray Tracing For Depth Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2025_ICCV, author = {He, Fengchen and Zhao, Dayang and Xu, Hao and Quan, Tingwei and Zeng, Shaoqun}, title = {Simulating Dual-Pixel Images From Ray Tracing For Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26106-26115} }
Training-Free Class Purification for Open-Vocabulary Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Qi and Yang, Lingxiao and Chen, Yun and Zhao, Nailong and Lai, Jianhuang and Shao, Jie and Xie, Xiaohua}, title = {Training-Free Class Purification for Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23124-23134} }
Generalizable Object Re-Identification via Visual In-Context Prompting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Zhizhong and Liu, Xiaoming}, title = {Generalizable Object Re-Identification via Visual In-Context Prompting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22539-22550} }
Pseudo-SD: Pseudo Controlled Stable Diffusion for Semi-Supervised and Cross-Domain Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_ICCV, author = {Zhao, Dong and Zang, Qi and Wang, Shuang and Sebe, Nicu and Zhong, Zhun}, title = {Pseudo-SD: Pseudo Controlled Stable Diffusion for Semi-Supervised and Cross-Domain Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22393-22403} }
MaTVLM: Hybrid Mamba-Transformer for Efficient Vision-Language Modeling-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Yingyue and Liao, Bencheng and Liu, Wenyu and Wang, Xinggang}, title = {MaTVLM: Hybrid Mamba-Transformer for Efficient Vision-Language Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20878-20888} }
HiNeuS: High-fidelity Neural Surface Mitigating Low-texture and Reflective Ambiguity-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Yida and Zhang, Xueyang and Zhan, Kun and Jia, Peng and Lang, Xianpeng}, title = {HiNeuS: High-fidelity Neural Surface Mitigating Low-texture and Reflective Ambiguity}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25746-25755} }
CaptionSmiths: Flexibly Controlling Language Pattern in Image Captioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Saito_2025_ICCV, author = {Saito, Kuniaki and Kim, Donghyun and Park, Kwanyong and Hashimoto, Atsushi and Ushiku, Yoshitaka}, title = {CaptionSmiths: Flexibly Controlling Language Pattern in Image Captioning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19872-19881} }
CoHD: A Counting-Aware Hierarchical Decoding Framework for Generalized Referring Expression Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2025_ICCV, author = {Luo, Zhuoyan and Wu, Yinghao and Cheng, Tianheng and Liu, Yong and Xiao, Yicheng and Wang, Hongfa and Zhang, Xiao-Ping and Yang, Yujiu}, title = {CoHD: A Counting-Aware Hierarchical Decoding Framework for Generalized Referring Expression Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22685-22694} }
LANGTRAJ: Diffusion Model and Dataset for Language-Conditioned Trajectory Simulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chang_2025_ICCV, author = {Chang, Wei-Jer and Zhan, Wei and Tomizuka, Masayoshi and Chandraker, Manmohan and Pittaluga, Francesco}, title = {LANGTRAJ: Diffusion Model and Dataset for Language-Conditioned Trajectory Simulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26622-26631} }
Generative Active Learning for Long-tail Trajectory Prediction via Controllable Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2025_ICCV, author = {Park, Daehee and Surana, Monu and Desai, Pranav and Mehta, Ashish and John, Reuben MV and Yoon, Kuk-Jin}, title = {Generative Active Learning for Long-tail Trajectory Prediction via Controllable Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27839-27850} }
Bridging Local Inductive Bias and Long-Range Dependencies with Pixel-Mamba for End-to-end Whole Slide Image Analysis-
[pdf]
[supp]
[bibtex]@InProceedings{Qiu_2025_ICCV, author = {Qiu, Zhongwei and Chao, Hanqing and Lin, Tiancheng and Chang, Wanxing and Yang, Zijiang and Jiao, Wenpei and Shen, Yixuan and Zhang, Yunshuo and Yang, Yelin and Liu, Wenbin and Jiang, Hui and Bian, Yun and Yan, Ke and Jin, Dakai and Lu, Le}, title = {Bridging Local Inductive Bias and Long-Range Dependencies with Pixel-Mamba for End-to-end Whole Slide Image Analysis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22738-22747} }
TARS: Traffic-Aware Radar Scene Flow Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Jialong and Braun, Marco and Spata, Dominic and Rottmann, Matthias}, title = {TARS: Traffic-Aware Radar Scene Flow Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26075-26084} }
Unified Multi-Agent Trajectory Modeling with Masked Trajectory Diffusion-
[pdf]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Songru and Shi, Zhenwei and Zou, Zhengxia}, title = {Unified Multi-Agent Trajectory Modeling with Masked Trajectory Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27563-27574} }
ChatReID: Open-ended Interactive Person Retrieval via Hierarchical Progressive Tuning for Vision Language Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Niu_2025_ICCV, author = {Niu, Ke and Yu, Haiyang and Zhao, Mengyang and Fu, Teng and Yi, Siyang and Lu, Wei and Li, Bin and Qian, Xuelin and Xue, Xiangyang}, title = {ChatReID: Open-ended Interactive Person Retrieval via Hierarchical Progressive Tuning for Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24245-24254} }
TAViS: Text-bridged Audio-Visual Segmentation with Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2025_ICCV, author = {Luo, Ziyang and Liu, Nian and Yang, Xuguang and Khan, Salman and Anwer, Rao Muhammad and Cholakkal, Hisham and Khan, Fahad Shahbaz and Han, Junwei}, title = {TAViS: Text-bridged Audio-Visual Segmentation with Foundation Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24014-24023} }
ORION: A Holistic End-to-End Autonomous Driving Framework by Vision-Language Instructed Action Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fu_2025_ICCV, author = {Fu, Haoyu and Zhang, Diankun and Zhao, Zongchuang and Cui, Jianfeng and Liang, Dingkang and Zhang, Chong and Zhang, Dingyuan and Xie, Hongwei and Wang, Bing and Bai, Xiang}, title = {ORION: A Holistic End-to-End Autonomous Driving Framework by Vision-Language Instructed Action Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24823-24834} }
LaCoOT: Layer Collapse through Optimal Transport-
[pdf]
[supp]
[bibtex]@InProceedings{Quetu_2025_ICCV, author = {Qu\'etu, Victor and Liao, Zhu and Hezbri, Nour and Pizzati, Fabio and Tartaglione, Enzo}, title = {LaCoOT: Layer Collapse through Optimal Transport}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20497-20507} }
ReferDINO: Referring Video Object Segmentation with Visual Grounding Foundations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2025_ICCV, author = {Liang, Tianming and Lin, Kun-Yu and Tan, Chaolei and Zhang, Jianguo and Zheng, Wei-Shi and Hu, Jian-Fang}, title = {ReferDINO: Referring Video Object Segmentation with Visual Grounding Foundations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20009-20019} }
NuiScene: Exploring Efficient Generation of Unbounded Outdoor Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Han-Hung and Han, Qinghong and Chang, Angel X.}, title = {NuiScene: Exploring Efficient Generation of Unbounded Outdoor Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26509-26518} }
DM-EFS: Dynamically Multiplexed Expanded Features Set Form for Robust and Efficient Small Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Sharma_2025_ICCV, author = {Sharma, Aashish}, title = {DM-EFS: Dynamically Multiplexed Expanded Features Set Form for Robust and Efficient Small Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24569-24579} }
AirCache: Activating Inter-modal Relevancy KV Cache Compression for Efficient Large Vision-Language Model Inference-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Kai and Zou, Hao and Wang, Bochen and Xi, Ye and Xie, Zhen and Wang, Hao}, title = {AirCache: Activating Inter-modal Relevancy KV Cache Compression for Efficient Large Vision-Language Model Inference}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23958-23967} }
SSVQ: Unleashing the Potential of Vector Quantization with Sign-Splitting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Shuaiting and Deng, Juncan and Wang, Chengxuan and Xu, Kedong and Deng, Rongtao and Gu, Hong and Shen, Haibin and Huang, Kejie}, title = {SSVQ: Unleashing the Potential of Vector Quantization with Sign-Splitting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23710-23719} }
DrivingGPT: Unifying Driving World Modeling and Planning with Multi-modal Autoregressive Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Yuntao and Wang, Yuqi and Zhang, Zhaoxiang}, title = {DrivingGPT: Unifying Driving World Modeling and Planning with Multi-modal Autoregressive Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26890-26900} }
GS-ID: Illumination Decomposition on Gaussian Splatting via Adaptive Light Aggregation and Diffusion-Guided Material Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Du_2025_ICCV, author = {Du, Kang and Liang, Zhihao and Shen, Yulin and Wang, Zeyu}, title = {GS-ID: Illumination Decomposition on Gaussian Splatting via Adaptive Light Aggregation and Diffusion-Guided Material Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26220-26229} }
4D Gaussian Splatting SLAM-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Yanyan and Fang, Youxu and Zhu, Zunjie and Li, Kunyi and Ding, Yong and Tombari, Federico}, title = {4D Gaussian Splatting SLAM}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25019-25028} }
SteerX: Creating Any Camera-Free 3D and 4D Scenes with Geometric Steering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2025_ICCV, author = {Park, Byeongjun and Go, Hyojun and Nam, Hyelin and Kim, Byung-Hoon and Chung, Hyungjin and Kim, Changick}, title = {SteerX: Creating Any Camera-Free 3D and 4D Scenes with Geometric Steering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27326-27337} }
SweetTok: Semantic-Aware Spatial-Temporal Tokenizer for Compact Video Discretization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tan_2025_ICCV, author = {Tan, Zhentao and Xue, Ben and Jia, Jian and Wang, Junhao and Ye, Wencai and Shi, Shaoyun and Sun, Mingjie and Wu, Wenjin and Chen, Quan and Jiang, Peng}, title = {SweetTok: Semantic-Aware Spatial-Temporal Tokenizer for Compact Video Discretization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23541-23550} }
D-Attn: Decomposed Attention for Large Vision-and-Language Model-
[pdf]
[supp]
[bibtex]@InProceedings{Kuo_2025_ICCV, author = {Kuo, Chia-Wen and Zhu, Sijie and Chen, Fan and Shen, Xiaohui and Wen, Longyin}, title = {D-Attn: Decomposed Attention for Large Vision-and-Language Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23935-23944} }
Gaussian Splatting with Discretized SDF for Relightable Assets-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Zuo-Liang and Yang, Jian and Wang, Beibei}, title = {Gaussian Splatting with Discretized SDF for Relightable Assets}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25155-25164} }
AAA-Gaussians: Anti-Aliased and Artifact-Free 3D Gaussian Rendering-
[pdf]
[supp]
[bibtex]@InProceedings{Steiner_2025_ICCV, author = {Steiner, Michael and K\"ohler, Thomas and Radl, Lukas and Windisch, Felix and Schmalstieg, Dieter and Steinberger, Markus}, title = {AAA-Gaussians: Anti-Aliased and Artifact-Free 3D Gaussian Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27650-27659} }
ResidualViT for Efficient Temporally Dense Video Encoding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Soldan_2025_ICCV, author = {Soldan, Mattia and Heilbron, Fabian Caba and Ghanem, Bernard and Sivic, Josef and Russell, Bryan}, title = {ResidualViT for Efficient Temporally Dense Video Encoding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22305-22315} }
HQ-CLIP: Leveraging Large Vision-Language Models to Create High-Quality Image-Text Datasets and CLIP Models-
[pdf]
[supp]
[bibtex]@InProceedings{Wei_2025_ICCV, author = {Wei, Zhixiang and Wang, Guangting and Ma, Xiaoxiao and Mei, Ke and Chen, Huaian and Jin, Yi and Rao, Fengyun}, title = {HQ-CLIP: Leveraging Large Vision-Language Models to Create High-Quality Image-Text Datasets and CLIP Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22447-22456} }
Bridging the Gap Between Ideal and Real-world Evaluation: Benchmarking AI-Generated Image Detection in Challenging Scenarios-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Chunxiao and Wang, Xiaoxiao and Li, Meiling and Miao, Boming and Sun, Peng and Zhang, Yunjian and Ji, Xiangyang and Zhu, Yao}, title = {Bridging the Gap Between Ideal and Real-world Evaluation: Benchmarking AI-Generated Image Detection in Challenging Scenarios}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20379-20389} }
Progressive Test Time Energy Adaptation for Medical Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xiaoran and Hong, Byung-Woo and Park, Hyoungseob and Pak, Daniel H. and Rickmann, Anne-Marie and Staib, Lawrence H. and Duncan, James S. and Wong, Alex}, title = {Progressive Test Time Energy Adaptation for Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22338-22348} }
MUG: Pseudo Labeling Augmented Audio-Visual Mamba Network for Audio-Visual Video Parsing-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Langyu and Zhu, Bingke and Chen, Yingying and Zhang, Yiyuan and Tang, Ming and Wang, Jinqiao}, title = {MUG: Pseudo Labeling Augmented Audio-Visual Mamba Network for Audio-Visual Video Parsing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20637-20646} }
Mixed Signals: A Diverse Point Cloud Dataset for Heterogeneous LiDAR V2X Collaboration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2025_ICCV, author = {Luo, Katie Z and Dao, Minh-Quan and Liu, Zhenzhen and Campbell, Mark and Chao, Wei-Lun and Weinberger, Kilian Q and Malis, Ezio and Fremont, Vincent and Hariharan, Bharath and Shan, Mao and Worrall, Stewart and Perez, Julie Stephany Berrio}, title = {Mixed Signals: A Diverse Point Cloud Dataset for Heterogeneous LiDAR V2X Collaboration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28763-28773} }
OracleFusion: Assisting the Decipherment of Oracle Bone Script with Structurally Constrained Semantic Typography-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Caoshuo and Ding, Zengmao and Hu, Xiaobin and Li, Bang and Luo, Donghao and Wu, AndyPian and Wang, Chaoyang and Wang, Chengjie and Jin, Taisong and Shu, Seven and Wu, Yunsheng and Liu, Yongge and Ji, Rongrong}, title = {OracleFusion: Assisting the Decipherment of Oracle Bone Script with Structurally Constrained Semantic Typography}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19893-19902} }
Streaming VideoLLMs for Real-Time Procedural Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chatterjee_2025_ICCV, author = {Chatterjee, Dibyadip and Remelli, Edoardo and Song, Yale and Tekin, Bugra and Mittal, Abhay and Bhatnagar, Bharat and Camgoz, Necati Cihan and Hampali, Shreyas and Sauser, Eric and Ma, Shugao and Yao, Angela and Sener, Fadime}, title = {Streaming VideoLLMs for Real-Time Procedural Video Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22586-22598} }
Mixture-of-Scores: Robust Image-Text Data Valuation via Three Lines of Code-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Sitong and Tan, Haoru and Chen, Yukang and Zhang, Shaofeng and Li, Jingyao and Yu, Bei and Qi, Xiaojuan and Jia, Jiaya}, title = {Mixture-of-Scores: Robust Image-Text Data Valuation via Three Lines of Code}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24603-24614} }
DMesh++: An Efficient Differentiable Mesh for Complex Shapes-
[pdf]
[supp]
[bibtex]@InProceedings{Son_2025_ICCV, author = {Son, Sanghyun and Gadelha, Matheus and Zhou, Yang and Fisher, Matthew and Xu, Zexiang and Qiao, Yi-Ling and Lin, Ming C. and Zhou, Yi}, title = {DMesh++: An Efficient Differentiable Mesh for Complex Shapes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26590-26599} }
Extrapolated Urban View Synthesis Benchmark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2025_ICCV, author = {Han, Xiangyu and Jia, Zhen and Li, Boyi and Wang, Yan and Ivanovic, Boris and You, Yurong and Liu, Lingjie and Wang, Yue and Pavone, Marco and Feng, Chen and Li, Yiming}, title = {Extrapolated Urban View Synthesis Benchmark}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28718-28728} }
Emulating Self-attention with Convolution for Efficient Image Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Dongheon and Yun, Seokju and Ro, Youngmin}, title = {Emulating Self-attention with Convolution for Efficient Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24467-24477} }
CoDa-4DGS: Dynamic Gaussian Splatting with Context and Deformation Awareness for Autonomous Driving-
[pdf]
[supp]
[bibtex]@InProceedings{Song_2025_ICCV, author = {Song, Rui and Liang, Chenwei and Xia, Yan and Zimmer, Walter and Cao, Hu and Caesar, Holger and Festag, Andreas and Knoll, Alois}, title = {CoDa-4DGS: Dynamic Gaussian Splatting with Context and Deformation Awareness for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28031-28041} }
RI3D: Few-Shot Gaussian Splatting With Repair and Inpainting Diffusion Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Paliwal_2025_ICCV, author = {Paliwal, Avinash and Zhou, Xilong and Ye, Wei and Xiong, Jinhui and Ranjan, Rakesh and Kalantari, Nima Khademi}, title = {RI3D: Few-Shot Gaussian Splatting With Repair and Inpainting Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25094-25103} }
UniVerse: Unleashing the Scene Prior of Video Diffusion Models for Robust Radiance Field Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2025_ICCV, author = {Cao, Jin and Wu, Hongrui and Feng, Ziyong and Bao, Hujun and Zhou, Xiaowei and Peng, Sida}, title = {UniVerse: Unleashing the Scene Prior of Video Diffusion Models for Robust Radiance Field Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27031-27041} }
From Gallery to Wrist: Realistic 3D Bracelet Insertion in Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_ICCV, author = {Gao, Chenjian and Ding, Lihe and Han, Rui and Huang, Zhanpeng and Wang, Zibin and Xue, Tianfan}, title = {From Gallery to Wrist: Realistic 3D Bracelet Insertion in Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25712-25721} }
Representing 3D Shapes with 64 Latent Vectors for 3D Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cho_2025_ICCV, author = {Cho, In and Yoo, Youngbeom and Jeon, Subin and Kim, Seon Joo}, title = {Representing 3D Shapes with 64 Latent Vectors for 3D Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28556-28566} }
CutS3D: Cutting Semantics in 3D for 2D Unsupervised Instance Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sick_2025_ICCV, author = {Sick, Leon and Engel, Dominik and Hartwig, Sebastian and Hermosilla, Pedro and Ropinski, Timo}, title = {CutS3D: Cutting Semantics in 3D for 2D Unsupervised Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21265-21275} }
Salvaging the Overlooked: Leveraging Class-Aware Contrastive Learning for Multi-Class Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2025_ICCV, author = {Fan, Lei and Huang, Junjie and Di, Donglin and Su, Anyang and Song, Tianyou and Pagnucco, Maurice and Song, Yang}, title = {Salvaging the Overlooked: Leveraging Class-Aware Contrastive Learning for Multi-Class Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21419-21428} }
Disentangling Instance and Scene Contexts for 3D Semantic Scene Completion-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Enyu and Yu, En and Chen, Sijia and Tao, Wenbing}, title = {Disentangling Instance and Scene Contexts for 3D Semantic Scene Completion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26999-27009} }
World4Drive: End-to-End Autonomous Driving via Intention-aware Physical Latent World Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_ICCV, author = {Zheng, Yupeng and Yang, Pengxuan and Xing, Zebin and Zhang, Qichao and Zheng, Yuhang and Gao, Yinfeng and Li, Pengfei and Zhang, Teng and Xia, Zhongpu and Jia, Peng and Lang, XianPeng and Zhao, Dongbin}, title = {World4Drive: End-to-End Autonomous Driving via Intention-aware Physical Latent World Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28632-28642} }
Online Language Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Katragadda_2025_ICCV, author = {Katragadda, Saimouli and Wu, Cho-Ying and Guo, Yuliang and Huang, Xinyu and Huang, Guoquan and Ren, Liu}, title = {Online Language Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25882-25892} }
VTimeCoT: Thinking by Drawing for Video Temporal Grounding and Reasoning-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Jinglei and Guo, Yuanfan and Potamias, Rolandos Alexandros and Deng, Jiankang and Xu, Hang and Ma, Chao}, title = {VTimeCoT: Thinking by Drawing for Video Temporal Grounding and Reasoning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24203-24213} }
RA-BUSSeg: Relation-aware Semi-supervised Breast Ultrasound Image Segmentation via Adjacent Propagation and Cross-layer Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Wanting and Ding, Zhenhui and Chen, Guilian and Wu, Huisi and Qin, Jing}, title = {RA-BUSSeg: Relation-aware Semi-supervised Breast Ultrasound Image Segmentation via Adjacent Propagation and Cross-layer Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21689-21698} }
Factorized Learning for Temporally Grounded Video-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zeng_2025_ICCV, author = {Zeng, Wenzheng and Gao, Difei and Shou, Mike Zheng and Ng, Hwee Tou}, title = {Factorized Learning for Temporally Grounded Video-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20683-20693} }
WeaveSeg: Iterative Contrast-weaving and Spectral Feature-refining for Nuclei Instance Segmentation-
[pdf]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Jiajia and Wu, Huisi and Qin, Jing}, title = {WeaveSeg: Iterative Contrast-weaving and Spectral Feature-refining for Nuclei Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21984-21993} }
A Real-world Display Inverse Rendering Dataset-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2025_ICCV, author = {Choi, Seokjun and Chung, Hoon-Gyu and Jeon, Yujin and Nam, Giljoo and Baek, Seung-Hwan}, title = {A Real-world Display Inverse Rendering Dataset}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25272-25283} }
MEH: A Multi-Style Dataset and Toolkit for Advancing Egyptian Hieroglyph Recognition-
[pdf]
[bibtex]@InProceedings{Golyadkin_2025_ICCV, author = {Golyadkin, Maksim and Rubanova, Valeria and Utkov, Aleksandr and Nikolotov, Dmitry and Makarov, Ilya}, title = {MEH: A Multi-Style Dataset and Toolkit for Advancing Egyptian Hieroglyph Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24488-24496} }
Hi-Gaussian: Hierarchical Gaussians under Normalized Spherical Projection for Single-View 3D Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2025_ICCV, author = {Xie, Binjian and Zhang, Pengju and Wei, Hao and Wu, Yihong}, title = {Hi-Gaussian: Hierarchical Gaussians under Normalized Spherical Projection for Single-View 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28664-28673} }
The Scalability of Simplicity: Empirical Analysis of Vision-Language Learning with a Single Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lei_2025_ICCV, author = {Lei, Weixian and Wang, Jiacong and Wang, Haochen and Li, Xiangtai and Liew, Jun Hao and Feng, Jiashi and Huang, Zilong}, title = {The Scalability of Simplicity: Empirical Analysis of Vision-Language Learning with a Single Transformer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20758-20769} }
End-to-End Multi-Modal Diffusion Mamba-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2025_ICCV, author = {Lu, Chunhao and Lu, Qiang and Dong, Meichen and Luo, Jake}, title = {End-to-End Multi-Modal Diffusion Mamba}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20529-20540} }
Vector Contrastive Learning For Pixel-Wise Pretraining In Medical Vision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2025_ICCV, author = {He, Yuting and Li, Shuo}, title = {Vector Contrastive Learning For Pixel-Wise Pretraining In Medical Vision}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19827-19837} }
Benefit From Seen: Enhancing Open-Vocabulary Object Detection by Bridging Visual and Textual Co-Occurrence Knowledge-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Yanqi and Niu, Jianwei and Ren, Tao}, title = {Benefit From Seen: Enhancing Open-Vocabulary Object Detection by Bridging Visual and Textual Co-Occurrence Knowledge}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22110-22119} }
SynAD: Enhancing Real-World End-to-End Autonomous Driving Models through Synthetic Data Integration-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Jongsuk and Lee, Jaeyoung and Han, Gyojin and Lee, Dong-Jae and Jeong, Minki and Kim, Junmo}, title = {SynAD: Enhancing Real-World End-to-End Autonomous Driving Models through Synthetic Data Integration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25197-25206} }
Purge-Gate: Backpropagation-Free Test-Time Adaptation for Point Clouds Classification via Token purging-
[pdf]
[supp]
[bibtex]@InProceedings{Yazdanpanah_2025_ICCV, author = {Yazdanpanah, Moslem and Bahri, Ali and Noori, Mehrdad and Dastani, Sahar and Hakim, Gustavo Adolfo Vargas and Osowiechi, David and Ben Ayed, Ismail and Desrosiers, Christian}, title = {Purge-Gate: Backpropagation-Free Test-Time Adaptation for Point Clouds Classification via Token purging}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27640-27649} }
PVChat: Personalized Video Chat with One-Shot Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2025_ICCV, author = {Shi, Yufei and Yan, Weilong and Xu, Gang and Li, Yumeng and Chen, Yucheng and Li, Zhenxi and Yu, Fei and Li, Ming and Yeo, Si Yong}, title = {PVChat: Personalized Video Chat with One-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23321-23331} }
NeuraLeaf: Neural Parametric Leaf Models with Shape and Deformation Disentanglement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Yang and Mao, Dongni and Santo, Hiroaki and Matsushita, Yasuyuki and Okura, Fumio}, title = {NeuraLeaf: Neural Parametric Leaf Models with Shape and Deformation Disentanglement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28167-28176} }
Vamba: Understanding Hour-Long Videos with Hybrid Mamba-Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2025_ICCV, author = {Ren, Weiming and Ma, Wentao and Yang, Huan and Wei, Cong and Zhang, Ge and Chen, Wenhu}, title = {Vamba: Understanding Hour-Long Videos with Hybrid Mamba-Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21197-21208} }
From Trial to Triumph: Advancing Long Video Understanding via Visual Context Sample Scaling and Self-reward Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Suo_2025_ICCV, author = {Suo, Yucheng and Ma, Fan and Zhu, Linchao and Wang, Tianyi and Rao, Fengyun and Yang, Yi}, title = {From Trial to Triumph: Advancing Long Video Understanding via Visual Context Sample Scaling and Self-reward Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23243-23255} }
GS-LIVM: Real-Time Photo-Realistic LiDAR-Inertial-Visual Mapping with Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2025_ICCV, author = {Xie, Yusen and Huang, Zhenmin and Wu, Jin and Ma, Jun}, title = {GS-LIVM: Real-Time Photo-Realistic LiDAR-Inertial-Visual Mapping with Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26869-26878} }
Self-Calibrating Gaussian Splatting for Large Field-of-View Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deng_2025_ICCV, author = {Deng, Youming and Xian, Wenqi and Yang, Guandao and Guibas, Leonidas and Wetzstein, Gordon and Marschner, Steve and Debevec, Paul}, title = {Self-Calibrating Gaussian Splatting for Large Field-of-View Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25124-25133} }
Automated Model Evaluation for Object Detection via Prediction Consistency and Reliability-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yoo_2025_ICCV, author = {Yoo, Seungju and Kwon, Hyuk and Hwang, Joong-Won and Lee, Kibok}, title = {Automated Model Evaluation for Object Detection via Prediction Consistency and Reliability}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19764-19773} }
Stepping Out of Similar Semantic Space for Open-Vocabulary Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Yong and Wu, Song-Li and Bai, Sule and Wang, Jiahao and Wang, Yitong and Tang, Yansong}, title = {Stepping Out of Similar Semantic Space for Open-Vocabulary Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22664-22674} }
DictAS: A Framework for Class-Generalizable Few-Shot Anomaly Segmentation via Dictionary Lookup-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qu_2025_ICCV, author = {Qu, Zhen and Tao, Xian and Gong, Xinyi and Qu, ShiChen and Zhang, Xiaopei and Wang, Xingang and Shen, Fei and Zhang, Zhengtao and Prasad, Mukesh and Ding, Guiguang}, title = {DictAS: A Framework for Class-Generalizable Few-Shot Anomaly Segmentation via Dictionary Lookup}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20519-20528} }
Incremental Few-Shot Semantic Segmentation via Multi-Level Switchable Visual Prompts-
[pdf]
[supp]
[bibtex]@InProceedings{Wan_2025_ICCV, author = {Wan, Maoxian and Li, Kaige and Geng, Qichuan and Shi, Weimin and Zhou, Zhong}, title = {Incremental Few-Shot Semantic Segmentation via Multi-Level Switchable Visual Prompts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24113-24122} }
Faster and Better 3D Splatting via Group Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Chengbo and Ma, Guozheng and Xue, Yifei and Lao, Yizhen}, title = {Faster and Better 3D Splatting via Group Training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27968-27977} }
Flow-MIL: Constructing Highly-expressive Latent Feature Space For Whole Slide Image Classification Using Normalizing Flow-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2025_ICCV, author = {Ma, Yingfan and An, Bohan and Shen, Ao and Yuan, Mingzhi and Duan, Minghong and Wang, Manning}, title = {Flow-MIL: Constructing Highly-expressive Latent Feature Space For Whole Slide Image Classification Using Normalizing Flow}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23561-23570} }
3DRealCar: An In-the-wild RGB-D Car Dataset with 360-degree Views-
[pdf]
[supp]
[bibtex]@InProceedings{Du_2025_ICCV, author = {Du, Xiaobiao and Wang, Yida and Sun, Haiyang and Wu, Zhuojie and Sheng, Hongwei and Wang, Shuyun and Ying, Jiaying and Lu, Ming and Zhu, Tianqing and Zhan, Kun and Yu, Xin}, title = {3DRealCar: An In-the-wild RGB-D Car Dataset with 360-degree Views}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26488-26498} }
The Devil is in the Spurious Correlations: Boosting Moment Retrieval with Dynamic Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Xinyang and Wei, Fanyue and Duan, Lixin and Yao, Angela and Li, Wen}, title = {The Devil is in the Spurious Correlations: Boosting Moment Retrieval with Dynamic Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20981-20990} }
Event-based Visual Vibrometry-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Xinyu and Duan, Peiqi and Xiaokaiti, Yeliduosi and Xu, Chao and Shi, Boxin}, title = {Event-based Visual Vibrometry}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24666-24676} }
Keep Your Friends Close, and Your Enemies Farther: Distance-aware Voxel-wise Contrastive Learning for Semi-supervised Multi-organ Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_ICCV, author = {Zhao, Haochen and Niu, Jianwei and Liu, Xuefeng and Xie, Xiaozheng and Kuang, Li and Yang, Haotian and Dai, Bin and Meng, Hui and Wang, Yong}, title = {Keep Your Friends Close, and Your Enemies Farther: Distance-aware Voxel-wise Contrastive Learning for Semi-supervised Multi-organ Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21832-21842} }
Online Reasoning Video Segmentation with Just-in-Time Digital Twins-
[pdf]
[arXiv]
[bibtex]@InProceedings{Shen_2025_ICCV, author = {Shen, Yiqing and Liu, Bohan and Li, Chenjia and Seenivasan, Lalithkumar and Unberath, Mathias}, title = {Online Reasoning Video Segmentation with Just-in-Time Digital Twins}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24698-24706} }
InterGSEdit: Interactive 3D Gaussian Splatting Editing with 3D Geometry-Consistent Attention Prior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wen_2025_ICCV, author = {Wen, Minghao and Wu, Shengjie and Wang, Kangkan and Liang, Dong}, title = {InterGSEdit: Interactive 3D Gaussian Splatting Editing with 3D Geometry-Consistent Attention Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26136-26145} }
Ph-GAN: Physics-Inspired GAN for Generating SAR Images Under Limited Data-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xidan and Zhuang, Yihan and Guo, Qian and Yang, Haodong and Qian, Xuelin and Cheng, Gong and Han, Junwei and Huang, Zhongling}, title = {Ph-GAN: Physics-Inspired GAN for Generating SAR Images Under Limited Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29075-29085} }
Interpretable point cloud classification using multiple instance learning-
[pdf]
[supp]
[bibtex]@InProceedings{De_Vries_2025_ICCV, author = {De Vries, Matt and Naidoo, Reed and Fourkioti, Olga and Dent, Lucas G. and Curry, Nathan and Dunsby, Chris and Bakal, Chris}, title = {Interpretable point cloud classification using multiple instance learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22209-22220} }
Efficient Fine-Tuning of Large Models via Nested Low-Rank Adaptation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Lujun and Lin, Cheng and Li, Dezhi and Huang, You-Liang and Li, Wei and Wu, Tianyu and Zou, Jie and Xue, Wei and Han, Sirui and Guo, Yike}, title = {Efficient Fine-Tuning of Large Models via Nested Low-Rank Adaptation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22252-22262} }
CoSMIC: Continual Self-supervised Learning for Multi-Domain Medical Imaging via Conditional Mutual Information Maximization-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Yihang and Wen, Ying and Yang, Longzhen and He, Lianghua and Shen, Heng Tao}, title = {CoSMIC: Continual Self-supervised Learning for Multi-Domain Medical Imaging via Conditional Mutual Information Maximization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23051-23062} }
Learnable Retrieval Enhanced Visual-Text Alignment and Fusion for Radiology Report Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Qin and Liang, Guoyan and Li, Xindi and Chen, Jingyuan and Wang, Zhe and Yao, Chang and Wu, Sai}, title = {Learnable Retrieval Enhanced Visual-Text Alignment and Fusion for Radiology Report Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22529-22538} }
Revisiting Adversarial Patch Defenses on Object Detectors: Unified Evaluation, Large-Scale Dataset, and New Insights-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_ICCV, author = {Zheng, Junhao and Sun, Jiahao and Lin, Chenhao and Zhao, Zhengyu and Ma, Chen and Zhang, Chong and Wang, Cong and Wang, Qian and Shen, Chao}, title = {Revisiting Adversarial Patch Defenses on Object Detectors: Unified Evaluation, Large-Scale Dataset, and New Insights}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23476-23486} }
4D-Bench: Benchmarking Multi-modal Large Language Models for 4D Object Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Wenxuan and Li, Bing and Zheng, Cheng and Mai, Jinjie and Chen, Jun and Jiang, Letian and Hamdi, Abdullah and Martinez, Sara Rojas and Lin, Chia-Wen and Elhoseiny, Mohamed and Ghanem, Bernard}, title = {4D-Bench: Benchmarking Multi-modal Large Language Models for 4D Object Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21129-21143} }
How Do Optical Flow and Textual Prompts Collaborate to Assist in Audio-Visual Semantic Segmentation?-
[pdf]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Yujian and Gao, Peng and Xu, Yongqi and Fan, Wentao}, title = {How Do Optical Flow and Textual Prompts Collaborate to Assist in Audio-Visual Semantic Segmentation?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23342-23352} }
Epona: Autoregressive Diffusion World Model for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Kaiwen and Tang, Zhenyu and Hu, Xiaotao and Pan, Xingang and Guo, Xiaoyang and Liu, Yuan and Huang, Jingwei and Yuan, Li and Zhang, Qian and Long, Xiao-Xiao and Cao, Xun and Yin, Wei}, title = {Epona: Autoregressive Diffusion World Model for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27220-27230} }
Visual Surface Wave Elastography: Revealing Subsurface Physical Properties via Visible Surface Waves-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ogren_2025_ICCV, author = {Ogren, Alexander C. and Feng, Berthy T. and Ahn, Jihoon and Bouman, Katherine L. and Daraio, Chiara}, title = {Visual Surface Wave Elastography: Revealing Subsurface Physical Properties via Visible Surface Waves}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26446-26455} }
ViCTr: Vital Consistency Transfer for Pathology Aware Image Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Susladkar_2025_ICCV, author = {Susladkar, Onkar and Deshmukh, Gayatri and Tur, Yalcin and Durak, Gorkem and Bagci, Ulas}, title = {ViCTr: Vital Consistency Transfer for Pathology Aware Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22772-22782} }
When Pixel Difference Patterns Meet ViT: PiDiViT for Few-Shot Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Hongliang and Liu, Yongxiang and Mo, Canyu and Li, Weijie and Peng, Bowen and Liu, Li}, title = {When Pixel Difference Patterns Meet ViT: PiDiViT for Few-Shot Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24309-24318} }
From Objects to Events: Unlocking Complex Visual Understanding in Object Detectors via LLM-guided Symbolic Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2025_ICCV, author = {Zeng, Yuhui and Wu, Haoxiang and Nie, Wenjie and Chen, Guangyao and Zheng, Xiawu and Shen, Yunhang and Peng, Jun and Tian, Yonghong and Ji, Rongrong}, title = {From Objects to Events: Unlocking Complex Visual Understanding in Object Detectors via LLM-guided Symbolic Reasoning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24380-24391} }
Pruning All-Rounder: Rethinking and Improving Inference Efficiency for Large Vision Language Models-
[pdf]
[bibtex]@InProceedings{Suo_2025_ICCV, author = {Suo, Wei and Ma, Ji and Sun, Mengyang and Wu, Lin Yuanbo and Wang, Peng and Zhang, Yanning}, title = {Pruning All-Rounder: Rethinking and Improving Inference Efficiency for Large Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20247-20256} }
Discontinuity-aware Normal Integration for Generic Central Camera Models-
[pdf]
[supp]
[bibtex]@InProceedings{Milano_2025_ICCV, author = {Milano, Francesco and L\'opez-Antequera, Manuel and Dhingra, Naina and Siegwart, Roland and Thiel, Robert}, title = {Discontinuity-aware Normal Integration for Generic Central Camera Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26026-26034} }
OmniDiff: A Comprehensive Benchmark for Fine-grained Image Difference Captioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Yuan and Hou, Saihui and Hou, Saijie and Du, Jiabao and Meng, Shibei and Huang, Yongzhen}, title = {OmniDiff: A Comprehensive Benchmark for Fine-grained Image Difference Captioning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21440-21449} }
MAESTRO: Task-Relevant Optimization via Adaptive Feature Enhancement and Suppression for Multi-task 3D Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2025_ICCV, author = {Kang, Changwon and Kim, Jisong and Shin, Hongjae and Park, Junseo and Choi, Jun Won}, title = {MAESTRO: Task-Relevant Optimization via Adaptive Feature Enhancement and Suppression for Multi-task 3D Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28313-28323} }
CasP: Improving Semi-Dense Feature Matching Pipeline Leveraging Cascaded Correspondence Priors for Guidance-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Peiqi and Yu, Lei and Wan, Yi and Pei, Yingying and Liu, Xinyi and Yao, Yongxiang and Zhang, Yingying and Ru, Lixiang and Zhong, Liheng and Chen, Jingdong and Yang, Ming and Zhang, Yongjun}, title = {CasP: Improving Semi-Dense Feature Matching Pipeline Leveraging Cascaded Correspondence Priors for Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28063-28072} }
ResQ: A Novel Framework to Implement Residual Neural Networks on Analog Rydberg Atom Quantum Computers-
[pdf]
[arXiv]
[bibtex]@InProceedings{DiBrita_2025_ICCV, author = {DiBrita, Nicholas S. and Han, Jason and Patel, Tirthak}, title = {ResQ: A Novel Framework to Implement Residual Neural Networks on Analog Rydberg Atom Quantum Computers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20085-20094} }
Coordinate-based Speed of Sound Recovery for Aberration-Corrected Photoacoustic Computed Tomography-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Tianao and Cui, Manxiu and Ma, Cheng and Alexander, Emma}, title = {Coordinate-based Speed of Sound Recovery for Aberration-Corrected Photoacoustic Computed Tomography}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27466-27475} }
HarmonySeg: Tubular Structure Segmentation with Deep-Shallow Feature Fusion and Growth-Suppression Balanced Loss-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Yi and Zhang, Ke and Liu, Wei and Wang, Yuanyuan and Patel, Vishal M. and Lu, Le and Han, Xu and Jin, Dakai and Yan, Ke}, title = {HarmonySeg: Tubular Structure Segmentation with Deep-Shallow Feature Fusion and Growth-Suppression Balanced Loss}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23571-23581} }
Beyond Training: Dynamic Token Merging for Zero-Shot Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yiming and Zhao, Zhuokai and Chen, Zhaorun and Ding, Zenghui and Yang, Xianjun and Sun, Yining}, title = {Beyond Training: Dynamic Token Merging for Zero-Shot Video Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22046-22055} }
CounterPC: Counterfactual Feature Realignment for Unsupervised Domain Adaptation on Point Clouds-
[pdf]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Feng and Cao, Yichao and Su, Xiu and Niu, Dan and Li, Xuanpeng}, title = {CounterPC: Counterfactual Feature Realignment for Unsupervised Domain Adaptation on Point Clouds}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24760-24769} }
MPG-SAM 2: Adapting SAM 2 with Mask Priors and Global Context for Referring Video Object Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rong_2025_ICCV, author = {Rong, Fu and Lan, Meng and Zhang, Qian and Zhang, Lefei}, title = {MPG-SAM 2: Adapting SAM 2 with Mask Priors and Global Context for Referring Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23979-23989} }
FreeSplatter: Pose-free Gaussian Splatting for Sparse-view 3D Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Jiale and Gao, Shenghua and Shan, Ying}, title = {FreeSplatter: Pose-free Gaussian Splatting for Sparse-view 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25442-25452} }
FreqPDE: Rethinking Positional Depth Embedding for Multi-View 3D Object Detection Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Su_2025_ICCV, author = {Su, Haisheng and Zhang, Junjie and Song, Feixiang and Zhou, Sanping and Wu, Wei and Yan, Junchi and Zheng, Nanning}, title = {FreqPDE: Rethinking Positional Depth Embedding for Multi-View 3D Object Detection Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28145-28155} }
Hierarchy-Aware Pseudo Word Learning with Text Adaptation for Zero-Shot Composed Image Retrieval-
[pdf]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Zhe and Zhang, Lei and Fu, Zheren and Zhang, Kun and Mao, Zhendong}, title = {Hierarchy-Aware Pseudo Word Learning with Text Adaptation for Zero-Shot Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24319-24329} }
WSI-LLaVA: A Multimodal Large Language Model for Whole Slide Image-
[pdf]
[supp]
[bibtex]@InProceedings{Liang_2025_ICCV, author = {Liang, Yuci and Lyu, Xinheng and Chen, Wenting and Ding, Meidan and Zhang, Jipeng and He, Xiangjian and Wu, Song and Xing, Xiaohan and Yang, Sen and Wang, Xiyue and Shen, Linlin}, title = {WSI-LLaVA: A Multimodal Large Language Model for Whole Slide Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22718-22727} }
Enhancing Partially Relevant Video Retrieval with Hyperbolic Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Jun and Wang, Jinpeng and Tan, Chaolei and Lian, Niu and Chen, Long and Wang, Yaowei and Zhang, Min and Xia, Shu-Tao and Chen, Bin}, title = {Enhancing Partially Relevant Video Retrieval with Hyperbolic Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23074-23084} }
Learning Yourself: Class-Incremental Semantic Segmentation with Language-Inspired Bootstrapped Disentanglement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Ruitao and Zhao, Yifan and Li, Jia}, title = {Learning Yourself: Class-Incremental Semantic Segmentation with Language-Inspired Bootstrapped Disentanglement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21623-21634} }
Towards Fine-grained Interactive Segmentation in Images and Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yao_2025_ICCV, author = {Yao, Yuan and Yang, Qiushi and Cui, Miaomiao and Bo, Liefeng}, title = {Towards Fine-grained Interactive Segmentation in Images and Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22509-22518} }
Puzzle Similarity: A Perceptually-guided Cross-Reference Metric for Artifact Detection in 3D Scene Reconstructions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hermann_2025_ICCV, author = {Hermann, Nicolai and Condor, Jorge and Didyk, Piotr}, title = {Puzzle Similarity: A Perceptually-guided Cross-Reference Metric for Artifact Detection in 3D Scene Reconstructions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28881-28891} }
AlignDiff: Learning Physically-Grounded Camera Alignment via Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2025_ICCV, author = {Xie, Liuyue and Guo, Jiancong and Cakmakci, Ozan and Araujo, Andre and Jeni, L\'aszl\'o A. and Jia, Zhiheng}, title = {AlignDiff: Learning Physically-Grounded Camera Alignment via Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26901-26911} }
Toward Fair and Accurate Cross-Domain Medical Image Segmentation: A VLM-Driven Active Domain Adaptation Paradigm-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Hongqiu and Chen, Wu and Luo, Xiangde and Xing, Zhaohu and Liu, Lihao and Qin, Jing and Wu, Shaozhi and Zhu, Lei}, title = {Toward Fair and Accurate Cross-Domain Medical Image Segmentation: A VLM-Driven Active Domain Adaptation Paradigm}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24102-24112} }
Scaling Transformer-Based Novel View Synthesis with Models Token Disentanglement and Synthetic Data-
[pdf]
[supp]
[bibtex]@InProceedings{Nair_2025_ICCV, author = {Nair, Nithin Gopalakrishnan and Kaza, Srinivas and Luo, Xuan and Patel, Vishal M. and Lombardi, Stephen and Park, Jungyeon}, title = {Scaling Transformer-Based Novel View Synthesis with Models Token Disentanglement and Synthetic Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28567-28576} }
Multi-Schema Proximity Network for Composed Image Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Shi_2025_ICCV, author = {Shi, Jiangming and Yin, Xiangbo and Chen, Yeyun and Zhang, Yachao and Zhang, Zhizhong and Xie, Yuan and Qu, Yanyun}, title = {Multi-Schema Proximity Network for Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19999-20008} }
Harnessing Text-to-Image Diffusion Models for Point Cloud Self-Supervised Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Yiyang and Zhao, Shanshan and Duan, Lunhao and Ding, Changxing and Tao, Dacheng}, title = {Harnessing Text-to-Image Diffusion Models for Point Cloud Self-Supervised Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26156-26166} }
Bridging the Gap between Brain and Machine in Interpreting Visual Semantics: Towards Self-adaptive Brain-to-Text Decoding-
[pdf]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Jiaxuan and Qi, Yu and Wang, Yueming and Pan, Gang}, title = {Bridging the Gap between Brain and Machine in Interpreting Visual Semantics: Towards Self-adaptive Brain-to-Text Decoding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21938-21948} }
Doppler-Aware LiDAR-RADAR Fusion for Weather-Robust 3D Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Chae_2025_ICCV, author = {Chae, Yujeong and Park, Heejun and Kim, Hyeonseong and Yoon, Kuk-Jin}, title = {Doppler-Aware LiDAR-RADAR Fusion for Weather-Robust 3D Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27197-27208} }
VideoOrion: Tokenizing Object Dynamics in Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2025_ICCV, author = {Feng, Yicheng and Li, Yijiang and Zhang, Wanpeng and Zheng, Sipeng and Luo, Hao and Yue, Zihao and Lu, Zongqing}, title = {VideoOrion: Tokenizing Object Dynamics in Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20401-20412} }
Cracking Instance Jigsaw Puzzles: An Alternative to Multiple Instance Learning for Whole Slide Image Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Xiwen and Qiu, Peijie and Zhu, Wenhui and Wang, Hao and Li, Huayu and Dong, Xuanzhao and Sun, Xiaotong and Yu, Xiaobing and Wang, Yalin and Razi, Abolfazl and Sotiras, Aristeidis}, title = {Cracking Instance Jigsaw Puzzles: An Alternative to Multiple Instance Learning for Whole Slide Image Analysis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21353-21363} }
FlowR: Flowing from Sparse to Dense 3D Reconstructions-
[pdf]
[supp]
[bibtex]@InProceedings{Fischer_2025_ICCV, author = {Fischer, Tobias and Bul\`o, Samuel Rota and Yang, Yung-Hsu and Keetha, Nikhil and Porzi, Lorenzo and M\"uller, Norman and Schwarz, Katja and Luiten, Jonathon and Pollefeys, Marc and Kontschieder, Peter}, title = {FlowR: Flowing from Sparse to Dense 3D Reconstructions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27702-27712} }
SPA: Efficient User-Preference Alignment against Uncertainty in Medical Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Jiayuan and Wu, Junde and Ouyang, Cheng and Kamnitsas, Konstantinos and Noble, J. Alison}, title = {SPA: Efficient User-Preference Alignment against Uncertainty in Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23731-23740} }
GaussianReg: Rapid 2D/3D Registration for Emergency Surgery via Explicit 3D Modeling with Gaussian Primitives-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2025_ICCV, author = {Yu, Weihao and Guo, Xiaoqing and Liu, Xinyu and Liu, Yifan and Zheng, Hao and Huang, Yawen and Yuan, Yixuan}, title = {GaussianReg: Rapid 2D/3D Registration for Emergency Surgery via Explicit 3D Modeling with Gaussian Primitives}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21482-21491} }
Robustifying Zero-Shot Vision Language Models by Subspaces Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Dong_2025_ICCV, author = {Dong, Junhao and Koniusz, Piotr and Feng, Liaoyuan and Zhang, Yifei and Zhu, Hao and Liu, Weiming and Qu, Xinghua and Ong, Yew-Soon}, title = {Robustifying Zero-Shot Vision Language Models by Subspaces Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21037-21047} }
GaRe: Relightable 3D Gaussian Splatting for Outdoor Scenes from Unconstrained Photo Collections-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bai_2025_ICCV, author = {Bai, Haiyang and Zhu, Jiaqi and Jiang, Songru and Huang, Wei and Lu, Tao and Li, Yuanqi and Guo, Jie and Fu, Runze and Guo, Yanwen and Chen, Lijun}, title = {GaRe: Relightable 3D Gaussian Splatting for Outdoor Scenes from Unconstrained Photo Collections}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26456-26465} }
LoD-Loc v2: Aerial Visual Localization over Low Level-of-Detail City Models using Explicit Silhouette Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Juelin and Peng, Shuaibang and Wang, Long and Tan, Hanlin and Liu, Yu and Zhang, Maojun and Yan, Shen}, title = {LoD-Loc v2: Aerial Visual Localization over Low Level-of-Detail City Models using Explicit Silhouette Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26610-26621} }
EYE3:Turn Anything into Naked-eye 3D-
[pdf]
[supp]
[bibtex]@InProceedings{Song_2025_ICCV, author = {Song, Yingde and Yang, Zongyuan and Liu, Baolin and Xiong, Yongping and Chen, Sai and Yi, Lan and Zhang, Zhaohe and Yu, Xunbo}, title = {EYE3:Turn Anything into Naked-eye 3D}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27862-27871} }
LV-MAE: Learning Long Video Representations through Masked-Embedding Autoencoders-
[pdf]
[supp]
[bibtex]@InProceedings{Naiman_2025_ICCV, author = {Naiman, Ilan and Ben-Baruch, Emanuel and Anschel, Oron and Shoshan, Alon and Kviatkovsky, Igor and Aggarwal, Manoj and Medioni, Gerard}, title = {LV-MAE: Learning Long Video Representations through Masked-Embedding Autoencoders}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21398-21407} }
STDDNet: Harnessing Mamba for Video Polyp Segmentation via Spatial-aligned Temporal Modeling and Discriminative Dynamic Representation Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Guilian and Wu, Huisi and Qin, Jing}, title = {STDDNet: Harnessing Mamba for Video Polyp Segmentation via Spatial-aligned Temporal Modeling and Discriminative Dynamic Representation Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21364-21373} }
2D Gaussian Splatting-based Sparse-view Transparent Object Depth Reconstruction via Physics Simulation for Scene Update-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Jeongyun and Jeong, Seunghoon and Kim, Giseop and Jeon, Myung-Hwan and Jun, Eunji and Kim, Ayoung}, title = {2D Gaussian Splatting-based Sparse-view Transparent Object Depth Reconstruction via Physics Simulation for Scene Update}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27927-27936} }
HouseCrafter: Lifting Floorplans to 3D Scenes with 2D Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Yiwen and Nguyen, Hieu T. and Voleti, Vikram and Jampani, Varun and Jiang, Huaizu}, title = {HouseCrafter: Lifting Floorplans to 3D Scenes with 2D Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28440-28450} }
Leveraging Local Patch Alignment to Seam-cutting for Large Parallax Image Stitching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liao_2025_ICCV, author = {Liao, Tianli and Zhao, Chenyang and Li, Lei and Cao, Heling}, title = {Leveraging Local Patch Alignment to Seam-cutting for Large Parallax Image Stitching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27262-27271} }
Revisiting Efficient Semantic Segmentation: Learning Offsets for Better Spatial and Class Feature Alignment-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Shi-Chen and Li, Yunheng and Wu, Yu-Huan and Hou, Qibin and Cheng, Ming-Ming}, title = {Revisiting Efficient Semantic Segmentation: Learning Offsets for Better Spatial and Class Feature Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22361-22371} }
TAD-E2E: A Large-scale End-to-end Autonomous Driving Dataset-
[pdf]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Chang and Zhu, Mingxu and Zhang, Zheyuan and Song, Linna and Zhao, Xiao and Luo, Qingliang and Wang, Qi and Guo, Chufan and Su, Kuifeng}, title = {TAD-E2E: A Large-scale End-to-end Autonomous Driving Dataset}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26600-26609} }
LVAgent: Long Video Understanding by Multi-Round Dynamical Collaboration of MLLM Agents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Boyu and Yue, Zhengrong and Chen, Siran and Wang, Zikang and Liu, Yang and Li, Peng and Wang, Yali}, title = {LVAgent: Long Video Understanding by Multi-Round Dynamical Collaboration of MLLM Agents}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20237-20246} }
Spatio-Spectral Pattern Illumination for Direct and Indirect Separation from a Single Hyperspectral Image-
[pdf]
[supp]
[bibtex]@InProceedings{Ishihara_2025_ICCV, author = {Ishihara, Shin and Sato, Imari}, title = {Spatio-Spectral Pattern Illumination for Direct and Indirect Separation from a Single Hyperspectral Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26827-26836} }
Explaining Human Preferences via Metrics for Structured 3D Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Langerman_2025_ICCV, author = {Langerman, Jack and Rozumnyi, Denys and Huang, Yuzhong and Mishkin, Dmytro}, title = {Explaining Human Preferences via Metrics for Structured 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26944-26953} }
VistaDream: Sampling multiview consistent images for single-view scene reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Haiping and Liu, Yuan and Liu, Ziwei and Wang, Wenping and Dong, Zhen and Yang, Bisheng}, title = {VistaDream: Sampling multiview consistent images for single-view scene reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26772-26782} }
LangScene-X: Reconstruct Generalizable 3D Language-Embedded Scenes with TriMap Video Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Fangfu and Li, Hao and Chi, Jiawei and Wang, Hanyang and Yang, Minghui and Wang, Fudong and Duan, Yueqi}, title = {LangScene-X: Reconstruct Generalizable 3D Language-Embedded Scenes with TriMap Video Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29010-29020} }
Orchid: Image Latent Diffusion for Joint Appearance and Geometry Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Krishnan_2025_ICCV, author = {Krishnan, Akshay and Yan, Xinchen and Casser, Vincent and Kundu, Abhijit}, title = {Orchid: Image Latent Diffusion for Joint Appearance and Geometry Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28217-28227} }
MonoMVSNet: Monocular Priors Guided Multi-View Stereo Network-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_ICCV, author = {Jiang, Jianfei and Liu, Qiankun and Yu, Haochen and Liu, Hongyuan and Wang, Liyong and Chen, Jiansheng and Ma, Huimin}, title = {MonoMVSNet: Monocular Priors Guided Multi-View Stereo Network}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27806-27816} }
Top2Pano: Learning to Generate Indoor Panoramas from Top-Down View-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Zitong and Gautam, Suranjan and Yu, Rui}, title = {Top2Pano: Learning to Generate Indoor Panoramas from Top-Down View}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28493-28502} }
Unbiased Missing-modality Multimodal Learning-
[pdf]
[bibtex]@InProceedings{Dai_2025_ICCV, author = {Dai, Ruiting and Li, Chenxi and Yan, Yandong and Mo, Lisi and Qin, Ke and He, Tao}, title = {Unbiased Missing-modality Multimodal Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24507-24517} }
Intermediate Connectors and Geometric Priors for Language-Guided Affordance Segmentation on Unseen Object Categories-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Yicong and Chen, Yiyang and Ma, Zhenyuan and Xiao, Junbin and Wang, Xiang and Yao, Angela}, title = {Intermediate Connectors and Geometric Priors for Language-Guided Affordance Segmentation on Unseen Object Categories}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22836-22845} }
FALCON: Resolving Visual Redundancy and Fragmentation in High-resolution Multimodal Large Language Models via Visual Registers-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Renshan and Shao, Rui and Chen, Gongwei and Zhang, Miao and Zhou, Kaiwen and Guan, Weili and Nie, Liqiang}, title = {FALCON: Resolving Visual Redundancy and Fragmentation in High-resolution Multimodal Large Language Models via Visual Registers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23530-23540} }
Transformer-based Tooth Alignment Prediction with Occlusion and Collision Constraints-
[pdf]
[arXiv]
[bibtex]@InProceedings{Dong_2025_ICCV, author = {Dong, Zhenxing and Chen, Jiazhou}, title = {Transformer-based Tooth Alignment Prediction with Occlusion and Collision Constraints}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25145-25154} }
Perspective-aware 3D Gaussian Inpainting with Multi-view Consistency-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2025_ICCV, author = {Cheng, Yuxin and Huang, Binxiao and Wu, Taiqiang and Zhou, Wenyong and Ding, Chenchen and Liu, Zhengwu and Chesi, Graziano and Wong, Ngai}, title = {Perspective-aware 3D Gaussian Inpainting with Multi-view Consistency}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28503-28513} }
DistillDrive: End-to-End Multi-Mode Autonomous Driving Distillation by Isomorphic Hetero-Source Planning Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_ICCV, author = {Yu, Rui and Zhang, Xianghang and Zhao, Runkai and Yan, Huaicheng and Wang, Meng}, title = {DistillDrive: End-to-End Multi-Mode Autonomous Driving Distillation by Isomorphic Hetero-Source Planning Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26188-26197} }
Wide2Long: Learning Lens Compression and Perspective Adjustment for Wide-Angle to Telephoto Translation-
[pdf]
[supp]
[bibtex]@InProceedings{Banerjee_2025_ICCV, author = {Banerjee, Soumyadipta and Paik, Jiaul H. and Sen, Debashis}, title = {Wide2Long: Learning Lens Compression and Perspective Adjustment for Wide-Angle to Telephoto Translation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29001-29009} }
V2XScenes: A Multiple Challenging Traffic Conditions Dataset for Large-Range Vehicle-Infrastructure Collaborative Perception-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Bowen and Wang, Yafei and Gong, Wei and Chen, Siheng and Liu, Genjia and Xiong, Minhao and Ng, Chin Long}, title = {V2XScenes: A Multiple Challenging Traffic Conditions Dataset for Large-Range Vehicle-Infrastructure Collaborative Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28385-28395} }
Is CLIP ideal? No. Can we fix it? Yes!-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2025_ICCV, author = {Kang, Raphi and Song, Yue and Gkioxari, Georgia and Perona, Pietro}, title = {Is CLIP ideal? No. Can we fix it? Yes!}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22436-22446} }
LaneDiffusion: Improving Centerline Graph Learning via Prior Injected BEV Feature Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Zijie and Zhang, Weiming and Zhang, Wei and Tan, Xiao and Liu, Hongxing and Wang, Yaowei and Li, Guanbin}, title = {LaneDiffusion: Improving Centerline Graph Learning via Prior Injected BEV Feature Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27052-27062} }
Height-Fidelity Dense Global Fusion for Multi-modal 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Hanshi and Gao, Jin and Hu, Weiming and Zhang, Zhipeng}, title = {Height-Fidelity Dense Global Fusion for Multi-modal 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26664-26674} }
DIH-CLIP: Unleashing the Diversity of Multi-Head Self-Attention for Training-Free Open-Vocabulary Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Duan_2025_ICCV, author = {Duan, Songsong and Yang, Xi and Wang, Nannan}, title = {DIH-CLIP: Unleashing the Diversity of Multi-Head Self-Attention for Training-Free Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22794-22803} }
ExploreGS: Explorable 3D Scene Reconstruction with Virtual Camera Samplings and Diffusion Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Minsu and Jeon, Subin and Cho, In and Yoo, Mijin and Kim, Seon Joo}, title = {ExploreGS: Explorable 3D Scene Reconstruction with Virtual Camera Samplings and Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27042-27051} }
Prototypes are Balanced Units for Efficient and Effective Partially Relevant Video Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Moon_2025_ICCV, author = {Moon, WonJun and Cho, Cheol-Ho and Jun, Woojin and Kim, Taeoh and Lee, Inwoong and Wee, Dongyoon and Shim, Minho and Heo, Jae-Pil}, title = {Prototypes are Balanced Units for Efficient and Effective Partially Relevant Video Retrieval}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21789-21799} }
SpiLiFormer: Enhancing Spiking Transformers with Lateral Inhibition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_ICCV, author = {Zheng, Zeqi and Huang, Yanchen and Yu, Yingchao and Zhu, Zizheng and Tang, Junfeng and Yu, Zhaofei and Jin, Yaochu}, title = {SpiLiFormer: Enhancing Spiking Transformers with Lateral Inhibition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24539-24548} }
Seeing the Trees for the Forest: Rethinking Weakly-Supervised Medical Visual Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huy_2025_ICCV, author = {Huy, Ta Duc and Huynh, Duy Anh and Xie, Yutong and Qi, Yuankai and Chen, Qi and Le Nguyen, Phi and Tran, Sen Kim and Phung, Son Lam and van den Hengel, Anton and Liao, Zhibin and To, Minh-Son and Verjans, Johan W. and Phan, Vu Minh Hieu}, title = {Seeing the Trees for the Forest: Rethinking Weakly-Supervised Medical Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24445-24455} }
GroundingSuite: Measuring Complex Multi-Granular Pixel Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2025_ICCV, author = {Hu, Rui and Zhu, Lianghui and Zhang, Yuxuan and Cheng, Tianheng and Liu, Lei and Liu, Heng and Ran, Longjin and Chen, Xiaoxin and Liu, Wenyu and Wang, Xinggang}, title = {GroundingSuite: Measuring Complex Multi-Granular Pixel Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23105-23114} }
VideoRFSplat: Direct Scene-Level Text-to-3D Gaussian Splatting Generation with Flexible Pose and Multi-View Joint Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Go_2025_ICCV, author = {Go, Hyojun and Park, Byeongjun and Nam, Hyelin and Kim, Byung-Hoon and Chung, Hyungjin and Kim, Changick}, title = {VideoRFSplat: Direct Scene-Level Text-to-3D Gaussian Splatting Generation with Flexible Pose and Multi-View Joint Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26706-26717} }
DATA: Domain-And-Time Alignment for High-Quality Feature Fusion in Collaborative Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tian_2025_ICCV, author = {Tian, Chengchang and Ma, Jianwei and Huang, Yan and Chen, Zhanye and Wei, Honghao and Zhang, Hui and Hong, Wei}, title = {DATA: Domain-And-Time Alignment for High-Quality Feature Fusion in Collaborative Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28643-28652} }
Bringing RNNs Back to Efficient Open-Ended Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Weili and Song, Enxin and Chai, Wenhao and Wen, Xuexiang and Ye, Tian and Wang, Gaoang}, title = {Bringing RNNs Back to Efficient Open-Ended Video Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23453-23465} }
ViM-VQ: Efficient Post-Training Vector Quantization for Visual Mamba-
[pdf]
[supp]
[bibtex]@InProceedings{Deng_2025_ICCV, author = {Deng, Juncan and Li, Shuaiting and Wang, Zeyu and Xu, Kedong and Gu, Hong and Huang, Kejie}, title = {ViM-VQ: Efficient Post-Training Vector Quantization for Visual Mamba}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24518-24527} }
EVEv2: Improved Baselines for Encoder-Free Vision-Language Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Diao_2025_ICCV, author = {Diao, Haiwen and Li, Xiaotong and Cui, Yufeng and Wang, Yueze and Deng, Haoge and Pan, Ting and Wang, Wenxuan and Lu, Huchuan and Wang, Xinlong}, title = {EVEv2: Improved Baselines for Encoder-Free Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21014-21025} }
End-to-End Driving with Online Trajectory Evaluation via BEV World Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Yingyan and Wang, Yuqi and Liu, Yang and He, Jiawei and Fan, Lue and Zhang, Zhaoxiang}, title = {End-to-End Driving with Online Trajectory Evaluation via BEV World Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27137-27146} }
MiDSummer: Multi-Guidance Diffusion for Controllable Zero-Shot Immersive Gaussian Splatting Scene Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2025_ICCV, author = {Hu, Anjun and Tomsett, Richard and Gourmet, Valentin and Camplani, Massimo and Kandola, Jas and Xie, Hanting}, title = {MiDSummer: Multi-Guidance Diffusion for Controllable Zero-Shot Immersive Gaussian Splatting Scene Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26793-26805} }
Global Regulation and Excitation via Attention Tuning for Stereo Matching-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Jiahao and Chen, Xinhong and Jiang, Zhengmin and Zhou, Qian and Li, Yung-Hui and Wang, Jianping}, title = {Global Regulation and Excitation via Attention Tuning for Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25539-25549} }
Visual Test-time Scaling for GUI Agent Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2025_ICCV, author = {Luo, Tiange and Logeswaran, Lajanugen and Johnson, Justin and Lee, Honglak}, title = {Visual Test-time Scaling for GUI Agent Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19989-19998} }
Hydra-NeXt: Robust Closed-Loop Driving with Open-Loop Training-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Zhenxin and Wang, Shihao and Lan, Shiyi and Yu, Zhiding and Wu, Zuxuan and Alvarez, Jose M.}, title = {Hydra-NeXt: Robust Closed-Loop Driving with Open-Loop Training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27305-27314} }
Heatmap Regression without Soft-Argmax for Facial Landmark Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Chiao-An and Yeh, Raymond A.}, title = {Heatmap Regression without Soft-Argmax for Facial Landmark Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28729-28739} }
Neural Shell Texture Splatting: More Details and Fewer Primitives-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xin and Chen, Anpei and Xiong, Jincheng and Dai, Pinxuan and Shen, Yujun and Xu, Weiwei}, title = {Neural Shell Texture Splatting: More Details and Fewer Primitives}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25229-25238} }
ZeroKey: Point-Level Reasoning and Zero-Shot 3D Keypoint Detection from Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gong_2025_ICCV, author = {Gong, Bingchen and Gomez, Diego and Hamdi, Abdullah and Eldesokey, Abdelrahman and Abdelreheem, Ahmed and Wonka, Peter and Ovsjanikov, Maks}, title = {ZeroKey: Point-Level Reasoning and Zero-Shot 3D Keypoint Detection from Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22089-22099} }
DreamCube: RGB-D Panorama Generation via Multi-plane Synchronization-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Yukun and Zhou, Yanning and Wang, Jianan and Huang, Kaiyi and Liu, Xihui}, title = {DreamCube: RGB-D Panorama Generation via Multi-plane Synchronization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24922-24932} }
ScanEdit: Hierarchically-Guided Functional 3D Scan Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{El_Amine_Boudjoghra_2025_ICCV, author = {El Amine Boudjoghra, Mohamed and Laptev, Ivan and Dai, Angela}, title = {ScanEdit: Hierarchically-Guided Functional 3D Scan Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27105-27115} }
DeFSS: Image-to-Mask Denoising Learning for Few-shot Segmentation-
[pdf]
[bibtex]@InProceedings{Qin_2025_ICCV, author = {Qin, Zishu and Xu, Junhao and Ge, Weifeng}, title = {DeFSS: Image-to-Mask Denoising Learning for Few-shot Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22232-22240} }
Video2BEV: Transforming Drone Videos to BEVs for Video-based Geo-localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ju_2025_ICCV, author = {Ju, Hao and Huang, Shaofei and Liu, Si and Zheng, Zhedong}, title = {Video2BEV: Transforming Drone Videos to BEVs for Video-based Geo-localization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27073-27083} }
High-Precision 3D Measurement of Complex Textured Surfaces Using Multiple Filtering Approach-
[pdf]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Yuchong and Yu, Jian and Gai, Shaoyan and Cai, Zeyu and Da, Feipeng}, title = {High-Precision 3D Measurement of Complex Textured Surfaces Using Multiple Filtering Approach}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25670-25679} }
RGE-GS: Reward-Guided Expansive Driving Scene Reconstruction via Diffusion Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Du_2025_ICCV, author = {Du, Sicong and Liu, Jiarun and Chen, Qifeng and Chen, Hao-Xiang and Mu, Tai-Jiang and Yang, Sheng}, title = {RGE-GS: Reward-Guided Expansive Driving Scene Reconstruction via Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25756-25764} }
VIPerson: Flexibly Generating Virtual Identity for Person Re-Identification-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xiao-Wen and Zhang, Delong and Peng, Yi-Xing and Ouyang, Zhi and Meng, Jingke and Zheng, Wei-Shi}, title = {VIPerson: Flexibly Generating Virtual Identity for Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23374-23384} }
PRM: Photometric Stereo based Large Reconstruction Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ge_2025_ICCV, author = {Ge, Wenhang and Lin, Jiantao and Shen, Guibao and Feng, Jiawei and Hu, Tao and Xu, Xinli and Chen, Ying-Cong}, title = {PRM: Photometric Stereo based Large Reconstruction Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25009-25018} }
DriveArena: A Closed-loop Generative Simulation Platform for Autonomous Driving-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Xuemeng and Wen, Licheng and Wei, Tiantian and Ma, Yukai and Mei, Jianbiao and Li, Xin and Lei, Wenjie and Fu, Daocheng and Cai, Pinlong and Dou, Min and He, Liang and Liu, Yong and Shi, Botian and Qiao, Yu}, title = {DriveArena: A Closed-loop Generative Simulation Platform for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26933-26943} }
InvRGB+L: Inverse Rendering of Complex Scenes with Unified Color and LiDAR Reflectance Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Xiaoxue and Chandaka, Bhargav and Lin, Chih-Hao and Zhang, Ya-Qin and Forsyth, David and Zhao, Hao and Wang, Shenlong}, title = {InvRGB+L: Inverse Rendering of Complex Scenes with Unified Color and LiDAR Reflectance Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27176-27186} }
Joint Semantic and Rendering Enhancements in 3D Gaussian Modeling with Anisotropic Local Encoding-
[pdf]
[supp]
[bibtex]@InProceedings{He_2025_ICCV, author = {He, Jingming and Li, Chongyi and Wang, Shiqi and Kwong, Sam}, title = {Joint Semantic and Rendering Enhancements in 3D Gaussian Modeling with Anisotropic Local Encoding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28354-28363} }
Unveiling the Invisible: Reasoning Complex Occlusions Amodally with AURA-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Zhixuan and Yoon, Hyunse and Lee, Sanghoon and Lin, Weisi}, title = {Unveiling the Invisible: Reasoning Complex Occlusions Amodally with AURA}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21927-21937} }
Lumina-Image 2.0: A Unified and Efficient Image Generative Framework-
[pdf]
[supp]
[bibtex]@InProceedings{Qin_2025_ICCV, author = {Qin, Qi and Zhuo, Le and Xin, Yi and Du, Ruoyi and Li, Zhen and Fu, Bin and Lu, Yiting and Li, Xinyue and Liu, Dongyang and Zhu, Xiangyang and Beddow, Will and Millon, Erwann and Perez, Victor and Wang, Wenhai and Qiao, Yu and Zhang, Bo and Liu, Xiaohong and Li, Hongsheng and Xu, Chang and Gao, Peng}, title = {Lumina-Image 2.0: A Unified and Efficient Image Generative Framework}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20031-20042} }
mmCooper: A Multi-agent Multi-stage Communication-efficient and Collaboration-robust Cooperative Perception Framework-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Bingyi and Teng, Jian and Xue, Hongfei and Wang, Enshu and Zhu, Chuanhui and Wang, Pu and Wu, Libing}, title = {mmCooper: A Multi-agent Multi-stage Communication-efficient and Collaboration-robust Cooperative Perception Framework}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28396-28406} }
OCSplats: Observation Completeness Quantification and Label Noise Separation in 3DGS-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ling_2025_ICCV, author = {Ling, Han and Xu, Xian and Sun, Yinghui and Sun, Quansen}, title = {OCSplats: Observation Completeness Quantification and Label Noise Separation in 3DGS}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25680-25689} }
Open-Vocabulary HOI Detection with Interaction-aware Prompt and Concept Calibration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lei_2025_ICCV, author = {Lei, Ting and Yin, Shaofeng and Chen, Qingchao and Peng, Yuxin and Liu, Yang}, title = {Open-Vocabulary HOI Detection with Interaction-aware Prompt and Concept Calibration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23945-23957} }
ROVI: A VLM-LLM Re-Captioned Dataset for Open-Vocabulary Instance-Grounded Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2025_ICCV, author = {Peng, Cihang and Hou, Qiming and Ren, Zhong and Zhou, Kun}, title = {ROVI: A VLM-LLM Re-Captioned Dataset for Open-Vocabulary Instance-Grounded Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20204-20214} }
A View-consistent Sampling Method for Regularized Training of Neural Radiance Fields-
[pdf]
[arXiv]
[bibtex]@InProceedings{Fan_2025_ICCV, author = {Fan, Aoxiang and Dumery, Corentin and Talabot, Nicolas and Fua, Pascal}, title = {A View-consistent Sampling Method for Regularized Training of Neural Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25961-25971} }
AdsQA: Towards Advertisement Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Long_2025_ICCV, author = {Long, Xinwei and Tian, Kai and Xu, Peng and Jia, Guoli and Li, Jingxuan and Yang, Sa and Shao, Yihua and Zhang, Kaiyan and Jiang, Che and Xu, Hao and Liu, Yang and Ma, Jiaheng and Zhou, Bowen}, title = {AdsQA: Towards Advertisement Video Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23396-23407} }
Fine-grained Abnormality Prompt Learning for Zero-shot Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Jiawen and Ong, Yew-Soon and Shen, Chunhua and Pang, Guansong}, title = {Fine-grained Abnormality Prompt Learning for Zero-shot Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22241-22251} }
Unified Open-World Segmentation with Multi-Modal Prompts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Yang and Yin, Yufei and Jing, Chenchen and Zhu, Muzhi and Chen, Hao and Xi, Yuling and Feng, Bo and Wang, Hao and Li, Shiyu and Shen, Chunhua}, title = {Unified Open-World Segmentation with Multi-Modal Prompts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21557-21567} }
ClaraVid: A Holistic Scene Reconstruction Benchmark From Aerial Perspective With Delentropy-Based Complexity Profiling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Beche_2025_ICCV, author = {Beche, Radu and Nedevschi, Sergiu}, title = {ClaraVid: A Holistic Scene Reconstruction Benchmark From Aerial Perspective With Delentropy-Based Complexity Profiling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26015-26025} }
X2-Gaussian: 4D Radiative Gaussian Splatting for Continuous-time Tomographic Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2025_ICCV, author = {Yu, Weihao and Cai, Yuanhao and Zha, Ruyi and Fan, Zhiwen and Li, Chenxin and Yuan, Yixuan}, title = {X2-Gaussian: 4D Radiative Gaussian Splatting for Continuous-time Tomographic Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24728-24738} }
FrameFusion: Combining Similarity and Importance for Video Token Reduction on Large Vision Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fu_2025_ICCV, author = {Fu, Tianyu and Liu, Tengxuan and Han, Qinghao and Dai, Guohao and Yan, Shengen and Yang, Huazhong and Ning, Xuefei and Wang, Yu}, title = {FrameFusion: Combining Similarity and Importance for Video Token Reduction on Large Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22654-22663} }
AURELIA: Test-time Reasoning Distillation in Audio-Visual LLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chowdhury_2025_ICCV, author = {Chowdhury, Sanjoy and Gani, Hanan and Anand, Nishit and Nag, Sayan and Gao, Ruohan and Elhoseiny, Mohamed and Khan, Salman and Manocha, Dinesh}, title = {AURELIA: Test-time Reasoning Distillation in Audio-Visual LLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22899-22910} }
FB-Diff: Fourier Basis-guided Diffusion for Temporal Interpolation of 4D Medical Imaging-
[pdf]
[supp]
[bibtex]@InProceedings{You_2025_ICCV, author = {You, Xin and Yang, Runze and Zhang, Chuyan and Jiang, Zhongliang and Yang, Jie and Navab, Nassir}, title = {FB-Diff: Fourier Basis-guided Diffusion for Temporal Interpolation of 4D Medical Imaging}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28010-28020} }
UNIS: A Unified Framework for Achieving Unbiased Neural Implicit Surfaces in Volume Rendering-
[pdf]
[supp]
[bibtex]@InProceedings{Deng_2025_ICCV, author = {Deng, Junkai and Niu, Hanting and Li, Jiaze and Hou, Fei and He, Ying}, title = {UNIS: A Unified Framework for Achieving Unbiased Neural Implicit Surfaces in Volume Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27671-27680} }
Seeing the Unseen: A Semantic Alignment and Context-Aware Prompt Framework for Open-Vocabulary Camouflaged Object Segmentation-
[pdf]
[bibtex]@InProceedings{Ren_2025_ICCV, author = {Ren, Peng and Bai, Tian and Sun, Jing and Sun, Fuming}, title = {Seeing the Unseen: A Semantic Alignment and Context-Aware Prompt Framework for Open-Vocabulary Camouflaged Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23657-23666} }
Test-time Adaptation for Foundation Medical Segmentation Model Without Parametric Updates-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Kecheng and Luo, Xinyu and Qin, Tiexin and Liu, Jie and Liu, Hui and Lee, Victor Ho Fun and Yan, Hong and Li, Haoliang}, title = {Test-time Adaptation for Foundation Medical Segmentation Model Without Parametric Updates}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20075-20084} }
Hierarchy UGP: Hierarchy Unified Gaussian Primitive for Large-Scale Dynamic Scene Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_ICCV, author = {Sun, Hongyang and Yang, Qinglin and Wang, Jiawei and Xu, Zhen and Liu, Chen and Wang, Yida and Zhan, Kun and Bao, Hujun and Zhou, Xiaowei and Peng, Sida}, title = {Hierarchy UGP: Hierarchy Unified Gaussian Primitive for Large-Scale Dynamic Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26252-26262} }
DisCo: Towards Distinct and Coherent Visual Encapsulation in Video MLLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_ICCV, author = {Zhao, Jiahe and Zheng, Rongkun and Wang, Yi and Wang, Helin and Zhao, Hengshuang}, title = {DisCo: Towards Distinct and Coherent Visual Encapsulation in Video MLLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21710-21720} }
Axis-level Symmetry Detection with Group-Equivariant Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_ICCV, author = {Yu, Wongyun and Seo, Ahyun and Cho, Minsu}, title = {Axis-level Symmetry Detection with Group-Equivariant Representation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24791-24800} }
Referring to Any Person-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_ICCV, author = {Jiang, Qing and Wu, Lin and Zeng, Zhaoyang and Ren, Tianhe and Xiong, Yuda and Chen, Yihao and Qin, Liu and Zhang, Lei}, title = {Referring to Any Person}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21667-21678} }
Statistical Confidence Rescoring for Robust 3D Scene Graph Generation from Multi-View Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yeo_2025_ICCV, author = {Yeo, Qi Xun and Li, Yanyan and Lee, Gim Hee}, title = {Statistical Confidence Rescoring for Robust 3D Scene Graph Generation from Multi-View Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24999-25008} }
DASH: 4D Hash Encoding with Self-Supervised Decomposition for Real-Time Dynamic Scene Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Jie and Hu, Zhangchi and Wu, Peixi and Zhu, Huyue and Li, Hebei and Sun, Xiaoyan}, title = {DASH: 4D Hash Encoding with Self-Supervised Decomposition for Real-Time Dynamic Scene Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26349-26359} }
MamV2XCalib: V2X-based Target-less Infrastructure Camera Calibration with State Space Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_ICCV, author = {Zhu, Yaoye and Wang, Zhe and Wang, Yan}, title = {MamV2XCalib: V2X-based Target-less Infrastructure Camera Calibration with State Space Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26696-26705} }
HiP-AD: Hierarchical and Multi-Granularity Planning with Deformable Attention for Autonomous Driving in a Single Decoder-
[pdf]
[supp]
[bibtex]@InProceedings{Tang_2025_ICCV, author = {Tang, Yingqi and Xu, Zhuoran and Meng, Zhaotie and Cheng, Erkang}, title = {HiP-AD: Hierarchical and Multi-Granularity Planning with Deformable Attention for Autonomous Driving in a Single Decoder}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25605-25615} }
Visual Textualization for Image Prompted Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Yongjian and Zhou, Yang and Saiyin, Jiya and Wei, Bingzheng and Xu, Yan}, title = {Visual Textualization for Image Prompted Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20900-20910} }
A Token-level Text Image Foundation Model for Document Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guan_2025_ICCV, author = {Guan, Tongkun and Wang, Zining and Fu, Pei and Guo, Zhengtao and Shen, Wei and Zhou, Kai and Yue, Tiezhu and Duan, Chen and Sun, Hao and Jiang, Qianyi and Luo, Junfeng and Yang, Xiaokang}, title = {A Token-level Text Image Foundation Model for Document Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23210-23220} }
NeRF Is a Valuable Assistant for 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2025_ICCV, author = {Fang, Shuangkang and Shen, I-Chao and Igarashi, Takeo and Wang, Yufeng and Wang, ZeSheng and Yang, Yi and Ding, Wenrui and Zhou, Shuchang}, title = {NeRF Is a Valuable Assistant for 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26230-26240} }
Towards Visual Localization Interoperability: Cross-Feature for Collaborative Visual Localization and Mapping-
[pdf]
[supp]
[bibtex]@InProceedings{Jaenal_2025_ICCV, author = {Jaenal, Alberto and Cubero, Paula Carb\'o and Ara\'ujo, Jos\'e and Mateus, Andr\'e}, title = {Towards Visual Localization Interoperability: Cross-Feature for Collaborative Visual Localization and Mapping}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26783-26792} }
PerLDiff: Controllable Street View Synthesis Using Perspective-Layout Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Jinhua and Sheng, Hualian and Cai, Sijia and Deng, Bing and Liang, Qiao and Li, Wen and Fu, Ying and Ye, Jieping and Gu, Shuhang}, title = {PerLDiff: Controllable Street View Synthesis Using Perspective-Layout Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26306-26315} }
VISO: Accelerating In-orbit Object Detection with Language-Guided Mask Learning and Sparse Inference-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Meiqi and Qiu, Han}, title = {VISO: Accelerating In-orbit Object Detection with Language-Guided Mask Learning and Sparse Inference}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23300-23310} }
ATCTrack: Aligning Target-Context Cues with Dynamic Target States for Robust Vision-Language Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2025_ICCV, author = {Feng, Xiaokun and Hu, Shiyu and Li, Xuchen and Zhang, Dailing and Wu, Meiqi and Zhang, Jing and Chen, Xiaotang and Huang, Kaiqi}, title = {ATCTrack: Aligning Target-Context Cues with Dynamic Target States for Robust Vision-Language Tracking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19850-19861} }
SeaS: Few-shot Industrial Anomaly Image Generation with Separation and Sharing Fine-tuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dai_2025_ICCV, author = {Dai, Zhewei and Zeng, Shilei and Liu, Haotian and Li, Xurui and Xue, Feng and Zhou, Yu}, title = {SeaS: Few-shot Industrial Anomaly Image Generation with Separation and Sharing Fine-tuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23135-23144} }
AMD: Adaptive Momentum and Decoupled Contrastive Learning Framework for Robust Long-Tail Trajectory Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rao_2025_ICCV, author = {Rao, Bin and Liao, Haicheng and Guan, Yanchen and Wang, Chengyue and Wang, Bonan and Zhang, Jiaxun and Li, Zhenning}, title = {AMD: Adaptive Momentum and Decoupled Contrastive Learning Framework for Robust Long-Tail Trajectory Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28849-28858} }
Music Grounding by Short Video-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xin_2025_ICCV, author = {Xin, Zijie and Wang, Minquan and Liu, Jingyu and Chen, Quan and Ma, Ye and Jiang, Peng and Li, Xirong}, title = {Music Grounding by Short Video}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22285-22293} }
When Confidence Fails: Revisiting Pseudo-Label Selection in Semi-supervised Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Pan and Liu, Jinshi}, title = {When Confidence Fails: Revisiting Pseudo-Label Selection in Semi-supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21874-21884} }
CalliReader: Contextualizing Chinese Calligraphy via an Embedding-Aligned Vision-Language Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2025_ICCV, author = {Luo, Yuxuan and Tang, Jiaqi and Huang, Chenyi and Hao, Feiyang and Lian, Zhouhui}, title = {CalliReader: Contextualizing Chinese Calligraphy via an Embedding-Aligned Vision-Language Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23030-23040} }
LOTS of Fashion! Multi-Conditioning for Image Generation via Sketch-Text Pairing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Girella_2025_ICCV, author = {Girella, Federico and Talon, Davide and Liu, Ziyue and Ruan, Zanxi and Wang, Yiming and Cristani, Marco}, title = {LOTS of Fashion! Multi-Conditioning for Image Generation via Sketch-Text Pairing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19711-19720} }
Plug-in Feedback Self-adaptive Attention in CLIP for Training-free Open-Vocabulary Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chi_2025_ICCV, author = {Chi, Zhixiang and Wu, Yanan and Gu, Li and Liu, Huan and Wang, Ziqiang and Zhang, Yang and Wang, Yang and Plataniotis, Konstantinos}, title = {Plug-in Feedback Self-adaptive Attention in CLIP for Training-free Open-Vocabulary Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22815-22825} }
Street Gaussians without 3D Object Tracker-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Ruida and Li, Chengxi and Zhang, Chenyangguang and Liu, Xingyu and Yuan, Haili and Li, Yanyan and Ji, Xiangyang and Lee, Gim Hee}, title = {Street Gaussians without 3D Object Tracker}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25722-25734} }
Principles of Visual Tokens for Efficient Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hao_2025_ICCV, author = {Hao, Xinyue and Li, Gen and Gowda, Shreyank N and Fisher, Robert B. and Huang, Jonathan and Arnab, Anurag and Sevilla-Lara, Laura}, title = {Principles of Visual Tokens for Efficient Video Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21254-21264} }
Similarity Memory Prior is All You Need for Medical Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2025_ICCV, author = {Tang, Hao and Guo, Zhiqing and Wang, Liejun and Liu, Chao}, title = {Similarity Memory Prior is All You Need for Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23009-23018} }
MMGeo: Multimodal Compositional Geo-Localization for UAVs-
[pdf]
[bibtex]@InProceedings{Ji_2025_ICCV, author = {Ji, Yuxiang and He, Boyong and Tan, Zhuoyue and Wu, Liaoni}, title = {MMGeo: Multimodal Compositional Geo-Localization for UAVs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25165-25175} }
InstructSeg: Unifying Instructed Visual Segmentation with Multi-modal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2025_ICCV, author = {Wei, Cong and Zhong, Yujie and Tan, Haoxian and Zeng, Yingsen and Liu, Yong and Wang, Hongfa and Yang, Yujiu}, title = {InstructSeg: Unifying Instructed Visual Segmentation with Multi-modal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20193-20203} }
Beyond Simple Edits: Composed Video Retrieval with Dense Modifications-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Thawakar_2025_ICCV, author = {Thawakar, Omkar and Demidov, Dmitry and Thawkar, Ritesh and Anwer, Rao Muhammad and Shah, Mubarak and Khan, Fahad Shahbaz and Khan, Salman}, title = {Beyond Simple Edits: Composed Video Retrieval with Dense Modifications}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20435-20444} }
Leveraging Prior Knowledge of Diffusion Model for Person Search-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Giyeol and Yang, Sooyoung and Oh, Jihyong and Kang, Myungjoo and Eom, Chanho}, title = {Leveraging Prior Knowledge of Diffusion Model for Person Search}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20301-20312} }
PS3: A Multimodal Transformer Integrating Pathology Reports with Histology Images and Biological Pathways for Cancer Survival Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Raza_2025_ICCV, author = {Raza, Manahil and Azam, Ayesha and Qaiser, Talha and Rajpoot, Nasir}, title = {PS3: A Multimodal Transformer Integrating Pathology Reports with Histology Images and Biological Pathways for Cancer Survival Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22175-22186} }
Debiasing Trace Guidance: Top-down Trace Distillation and Bottom-up Velocity Alignment for Unsupervised Anomaly Detection-
[pdf]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Xingjian and Chai, Li and Chen, Jiming}, title = {Debiasing Trace Guidance: Top-down Trace Distillation and Bottom-up Velocity Alignment for Unsupervised Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22989-22998} }
EDM: Efficient Deep Feature Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Xi and Rao, Tong and Pan, Cihui}, title = {EDM: Efficient Deep Feature Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26198-26208} }
CorrCLIP: Reconstructing Patch Correlations in CLIP for Open-Vocabulary Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Dengke and Liu, Fagui and Tang, Quan}, title = {CorrCLIP: Reconstructing Patch Correlations in CLIP for Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24677-24687} }
LBM: Latent Bridge Matching for Fast Image-to-Image Translation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chadebec_2025_ICCV, author = {Chadebec, Cl\'ement and Tasar, Onur and Sreetharan, Sanjeev and Aubin, Benjamin}, title = {LBM: Latent Bridge Matching for Fast Image-to-Image Translation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29086-29098} }
Towards a 3D Transfer-based Black-box Attack via Critical Feature Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pang_2025_ICCV, author = {Pang, Shuchao and Chen, Zhenghan and Zhang, Shen and Lu, Liming and Liang, Siyuan and Du, Anan and Zhou, Yongbin}, title = {Towards a 3D Transfer-based Black-box Attack via Critical Feature Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26912-26922} }
LLM-Assisted Semantic Guidance for Sparsely Annotated Remote Sensing Object Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liao_2025_ICCV, author = {Liao, Wei and Xu, Chunyan and Wang, Chenxu and Cui, Zhen}, title = {LLM-Assisted Semantic Guidance for Sparsely Annotated Remote Sensing Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22519-22528} }
Geometric Alignment and Prior Modulation for View-Guided Point Cloud Completion on Unseen Categories-
[pdf]
[bibtex]@InProceedings{Xiu_2025_ICCV, author = {Xiu, Jingqiao and Li, Yicong and Zhao, Na and Fang, Han and Wang, Xiang and Yao, Angela}, title = {Geometric Alignment and Prior Modulation for View-Guided Point Cloud Completion on Unseen Categories}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27435-27444} }
ODDR: Outlier Detection & Dimension Reduction Based Defense Against Adversarial Patches-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chattopadhyay_2025_ICCV, author = {Chattopadhyay, Nandish and Guesmi, Amira and Hanif, Muhammad Abdullah and Ouni, Bassem and Shafique, Muhammad}, title = {ODDR: Outlier Detection \& Dimension Reduction Based Defense Against Adversarial Patches}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22999-23008} }
Stochastic Gradient Estimation for Higher-Order Differentiable Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Zican and Fischer, Michael and Ritschel, Tobias}, title = {Stochastic Gradient Estimation for Higher-Order Differentiable Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28198-28206} }
PolGS: Polarimetric Gaussian Splatting for Fast Reflective Surface Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2025_ICCV, author = {Han, Yufei and Tie, Bowen and Guo, Heng and Lyu, Youwei and Li, Si and Shi, Boxin and Jia, Yunpeng and Ma, Zhanyu}, title = {PolGS: Polarimetric Gaussian Splatting for Fast Reflective Surface Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28073-28082} }
ERNet: Efficient Non-Rigid Registration Network for Point Sequences-
[pdf]
[supp]
[bibtex]@InProceedings{He_2025_ICCV, author = {He, Guangzhao and Xiao, Yuxi and Xu, Zhen and Zhou, Xiaowei and Peng, Sida}, title = {ERNet: Efficient Non-Rigid Registration Network for Point Sequences}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27156-27165} }
Does Your Vision-Language Model Get Lost in the Long Video Sampling Dilemma?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qu_2025_ICCV, author = {Qu, Tianyuan and Tang, Longxiang and Peng, Bohao and Yang, Senqiao and Yu, Bei and Jia, Jiaya}, title = {Does Your Vision-Language Model Get Lost in the Long Video Sampling Dilemma?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20889-20899} }
Towards Safer and Understandable Driver Intention Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Karuppasamy_2025_ICCV, author = {Karuppasamy, Mukilan and Gangisetty, Shankar and Rai, Shyam Nandan and Masone, Carlo and Jawahar, C V}, title = {Towards Safer and Understandable Driver Intention Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25378-25387} }
Generative Gaussian Splatting: Generating 3D Scenes with Video Diffusion Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Schwarz_2025_ICCV, author = {Schwarz, Katja and M\"uller, Norman and Kontschieder, Peter}, title = {Generative Gaussian Splatting: Generating 3D Scenes with Video Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27510-27520} }
AR-1-to-3: Single Image to Consistent 3D Object via Next-View Prediction-
[pdf]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xuying and Zhou, Yupeng and Wang, Kai and Wang, Yikai and Li, Zhen and Jiao, Shaohui and Zhou, Daquan and Hou, Qibin and Cheng, Ming-Ming}, title = {AR-1-to-3: Single Image to Consistent 3D Object via Next-View Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26273-26283} }
Inverse 3D Microscopy Rendering for Cell Shape Inference with Active Mesh-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ichbiah_2025_ICCV, author = {Ichbiah, Sacha and Sinha, Anshuman and Delbary, Fabrice and Turlier, Herv\'e}, title = {Inverse 3D Microscopy Rendering for Cell Shape Inference with Active Mesh}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26987-26998} }
DC-TTA: Divide-and-Conquer Framework for Test-Time Adaptation of Interactive Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Jihun and Kwon, Hoyong and Kweon, Hyeokjun and Jeong, Wooseong and Yoon, Kuk-Jin}, title = {DC-TTA: Divide-and-Conquer Framework for Test-Time Adaptation of Interactive Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23279-23289} }
NAVER: A Neuro-Symbolic Compositional Automaton for Visual Grounding with Explicit Logic Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2025_ICCV, author = {Cai, Zhixi and Ke, Fucai and Jahangard, Simindokht and de la Banda, Maria Garcia and Haffari, Reza and Stuckey, Peter J. and Rezatofighi, Hamid}, title = {NAVER: A Neuro-Symbolic Compositional Automaton for Visual Grounding with Explicit Logic Reasoning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24078-24089} }
MergeOcc: Bridge the Domain Gap between Different LiDARs for Robust Occupancy Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Zikun and Xu, Shaobing}, title = {MergeOcc: Bridge the Domain Gap between Different LiDARs for Robust Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26539-26548} }
AutoScape: Geometry-Consistent Long-Horizon Scene Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Jiacheng and Jiang, Ziyu and Liang, Mingfu and Zhuang, Bingbing and Su, Jong-Chyi and Garg, Sparsh and Wu, Ying and Chandraker, Manmohan}, title = {AutoScape: Geometry-Consistent Long-Horizon Scene Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25700-25711} }
Synchronizing Task Behavior: Aligning Multiple Tasks during Test-Time Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeong_2025_ICCV, author = {Jeong, Wooseong and Cho, Jegyeong and Yoon, Youngho and Yoon, Kuk-Jin}, title = {Synchronizing Task Behavior: Aligning Multiple Tasks during Test-Time Training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24340-24350} }
FLOSS: Free Lunch in Open-vocabulary Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Benigmim_2025_ICCV, author = {Benigmim, Yasser and Fahes, Mohammad and Vu, Tuan-Hung and Bursuc, Andrei and de Charette, Raoul}, title = {FLOSS: Free Lunch in Open-vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21471-21481} }
ToF-Splatting: Dense SLAM using Sparse Time-of-Flight Depth and Multi-Frame Integration-
[pdf]
[supp]
[bibtex]@InProceedings{Conti_2025_ICCV, author = {Conti, Andrea and Poggi, Matteo and Cambareri, Valerio and Oswald, Martin R. and Mattoccia, Stefano}, title = {ToF-Splatting: Dense SLAM using Sparse Time-of-Flight Depth and Multi-Frame Integration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28344-28353} }
Correspondence-Free Fast and Robust Spherical Point Pattern Registration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sarker_2025_ICCV, author = {Sarker, Anik and Asbeck, Alan T.}, title = {Correspondence-Free Fast and Robust Spherical Point Pattern Registration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28156-28166} }
CT-ScanGaze: A Dataset and Baselines for 3D Volumetric Scanpath Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Pham_2025_ICCV, author = {Pham, Trong Thang and Awasthi, Akash and Khan, Saba and Marti, Esteban Duran and Nguyen, Tien-Phat and Vo, Khoa and Tran, Minh and Nguyen, Son and Tran, Cuong and Ikebe, Yuki and Nguyen, Anh Totti and Nguyen, Anh and Deng, Zhigang and Wu, Carol C. and Nguyen, Hien and Le, Ngan}, title = {CT-ScanGaze: A Dataset and Baselines for 3D Volumetric Scanpath Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21732-21743} }
MOBIUS: Big-to-Mobile Universal Instance Segmentation via Multi-modal Bottleneck Fusion and Calibrated Decoder Pruning-
[pdf]
[supp]
[bibtex]@InProceedings{Segu_2025_ICCV, author = {Segu, Mattia and Gazulla, Marta Tintore and Xian, Yongqin and Van Gool, Luc and Tombari, Federico}, title = {MOBIUS: Big-to-Mobile Universal Instance Segmentation via Multi-modal Bottleneck Fusion and Calibrated Decoder Pruning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20726-20736} }
VoxelKP: A Voxel-based Network Architecture for Human Keypoint Estimation in LiDAR Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2025_ICCV, author = {Shi, Jian and Wonka, Peter}, title = {VoxelKP: A Voxel-based Network Architecture for Human Keypoint Estimation in LiDAR Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28282-28291} }
Normal and Abnormal Pathology Knowledge-Augmented Vision-Language Model for Anomaly Detection in Pathology Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2025_ICCV, author = {Song, Jinsol and Wang, Jiamu and Nguyen, Anh Tien and Byeon, Keunho and Ahn, Sangjeong and Lee, Sung Hak and Kwak, Jin Tae}, title = {Normal and Abnormal Pathology Knowledge-Augmented Vision-Language Model for Anomaly Detection in Pathology Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22066-22076} }
COME: Dual Structure-Semantic Learning with Collaborative MoE for Universal Lesion Detection Across Heterogeneous Ultrasound Datasets-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Lingyu and Zeng, Yawen and Wang, Yue and Wan, Peng and Ning, Guochen and Liao, Hongen and Zhang, Daoqiang and Chen, Fang}, title = {COME: Dual Structure-Semantic Learning with Collaborative MoE for Universal Lesion Detection Across Heterogeneous Ultrasound Datasets}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21460-21470} }
An Efficient Hybrid Vision Transformer for TinyML Applications-
[pdf]
[bibtex]@InProceedings{Zeng_2025_ICCV, author = {Zeng, Fanhong and Li, Huanan and Guan, Juntao and Fan, Rui and Wu, Tong and Wang, Xilong and Lai, Rui}, title = {An Efficient Hybrid Vision Transformer for TinyML Applications}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19914-19924} }
Object-centric Video Question Answering with Visual Grounding and Referring-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Haochen and Chen, Qirui and Yan, Cilin and Cai, Jiayin and Jiang, Xiaolong and Hu, Yao and Xie, Weidi and Gavves, Stratis}, title = {Object-centric Video Question Answering with Visual Grounding and Referring}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22274-22284} }
AG2aussian: Anchor-Graph Structured Gaussian Splatting for Instance-Level 3D Scene Understanding and Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Zhaonan and Li, Manyi and Tu, Changhe}, title = {AG2aussian: Anchor-Graph Structured Gaussian Splatting for Instance-Level 3D Scene Understanding and Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26806-26816} }
Soft Local Completeness: Rethinking Completeness in XAI-
[pdf]
[supp]
[bibtex]@InProceedings{Haddad_2025_ICCV, author = {Haddad, Ziv Weiss and Barkan, Oren and Elisha, Yehonatan and Koenigstein, Noam}, title = {Soft Local Completeness: Rethinking Completeness in XAI}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19794-19804} }
Open-ended Hierarchical Streaming Video Understanding with Vision Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2025_ICCV, author = {Kang, Hyolim and Park, Yunsu and Yoo, Youngbeom and Choi, Yeeun and Kim, Seon Joo}, title = {Open-ended Hierarchical Streaming Video Understanding with Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20715-20725} }
SUB: Benchmarking CBM Generalization via Synthetic Attribute Substitutions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bader_2025_ICCV, author = {Bader, Jessica and Girrbach, Leander and Alaniz, Stephan and Akata, Zeynep}, title = {SUB: Benchmarking CBM Generalization via Synthetic Attribute Substitutions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23188-23198} }
LLaVA-SP: Enhancing Visual Representation with Visual Spatial Tokens for MLLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Lou_2025_ICCV, author = {Lou, Haoran and Fan, Chunxiao and Liu, Ziyan and Wu, Yuexin and Wang, Xinliang}, title = {LLaVA-SP: Enhancing Visual Representation with Visual Spatial Tokens for MLLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22014-22024} }
LLM-enhanced Action-aware Multi-modal Prompt Tuning for Image-Text Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tian_2025_ICCV, author = {Tian, Mengxiao and Wu, Xinxiao and Yang, Shuo}, title = {LLM-enhanced Action-aware Multi-modal Prompt Tuning for Image-Text Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20748-20757} }
PointGAC: Geometric-Aware Codebook for Masked Point Modeling-
[pdf]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Abiao and Lv, Chenlei and Fang, Yuming and Zuo, Yifan and Zhang, Jian and Mei, Guofeng}, title = {PointGAC: Geometric-Aware Codebook for Masked Point Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24989-24998} }
Unbiased Region-Language Alignment for Open-Vocabulary Dense Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Yunheng and Li, Yuxuan and Zeng, Quan-Sheng and Wang, Wenhai and Hou, Qibin and Cheng, Ming-Ming}, title = {Unbiased Region-Language Alignment for Open-Vocabulary Dense Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23795-23805} }
CLIPer: Hierarchically Improving Spatial Representation of CLIP for Open-Vocabulary Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2025_ICCV, author = {Sun, Lin and Cao, Jiale and Xie, Jin and Jiang, Xiaoheng and Pang, Yanwei}, title = {CLIPer: Hierarchically Improving Spatial Representation of CLIP for Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23199-23209} }
SGAD: Semantic and Geometric-aware Descriptor for Local Feature Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Xiangzeng and Wang, Chi and Shi, Guanglu and Zhang, Xiaodong and Miao, Qiguang and Fan, Miao}, title = {SGAD: Semantic and Geometric-aware Descriptor for Local Feature Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27095-27104} }
RogSplat: Robust Gaussian Splatting via Generative Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Kong_2025_ICCV, author = {Kong, Hanyang and Yang, Xingyi and Wang, Xinchao}, title = {RogSplat: Robust Gaussian Splatting via Generative Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25735-25745} }
CARIM: Caption-Based Autonomous Driving Scene Retrieval via Inclusive Text Matching-
[pdf]
[supp]
[bibtex]@InProceedings{Ki_2025_ICCV, author = {Ki, Minjoo and Kim, Daejung and Kim, Kisung and Kim, Seon Joo and Lee, Jinhan}, title = {CARIM: Caption-Based Autonomous Driving Scene Retrieval via Inclusive Text Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22036-22045} }
Zero-Shot Compositional Video Learning with Coding Rate Reduction-
[pdf]
[supp]
[bibtex]@InProceedings{Jung_2025_ICCV, author = {Jung, Heeseok and Bak, Jun-Hyeon and Jeong, Yujin and Lee, Gyugeun and Ahn, Jinwoo and Kim, Eun-Sol}, title = {Zero-Shot Compositional Video Learning with Coding Rate Reduction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20508-20518} }
Benchmarking Egocentric Visual-Inertial SLAM at City Scale-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Krishnan_2025_ICCV, author = {Krishnan, Anusha and Liu, Shaohui and Sarlin, Paul-Edouard and Gentilhomme, Oscar and Caruso, David and Monge, Maurizio and Newcombe, Richard and Engel, Jakob and Pollefeys, Marc}, title = {Benchmarking Egocentric Visual-Inertial SLAM at City Scale}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25207-25217} }
CATSplat: Context-Aware Transformer with Spatial Guidance for Generalizable 3D Gaussian Splatting from A Single-View Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Roh_2025_ICCV, author = {Roh, Wonseok and Jung, Hwanhee and Kim, Jong Wook and Lee, Seunggwan and Yoo, Innfarn and Lugmayr, Andreas and Chi, Seunggeun and Ramani, Karthik and Kim, Sangpil}, title = {CATSplat: Context-Aware Transformer with Spatial Guidance for Generalizable 3D Gaussian Splatting from A Single-View Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28228-28238} }
Learning Normals of Noisy Points by Local Gradient-Aware Surface Filtering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Qing and Feng, Huifang and Gong, Xun and Liu, Yu-Shen}, title = {Learning Normals of Noisy Points by Local Gradient-Aware Surface Filtering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28828-28838} }
VRBench: A Benchmark for Multi-Step Reasoning in Long Narrative Videos-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yu_2025_ICCV, author = {Yu, Jiashuo and Wu, Yue and Chu, Meng and Ren, Zhifei and Huang, Zizheng and Chu, Pei and Zhang, Ruijie and He, Yinan and Li, Qirui and Li, Songze and Li, Zhenxiang and Tu, Zhongying and He, Conghui and Qiao, Yu and Wang, Yali and Wang, Yi and Wang, Limin}, title = {VRBench: A Benchmark for Multi-Step Reasoning in Long Narrative Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21655-21666} }
How Can Objects Help Video-Language Understanding?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2025_ICCV, author = {Tang, Zitian and Wang, Shijie and Cho, Junho and Yoo, Jaewook and Sun, Chen}, title = {How Can Objects Help Video-Language Understanding?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21994-22003} }
Conditional Latent Diffusion Models for Zero-Shot Instance Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ulmer_2025_ICCV, author = {Ulmer, Maximilian and Boerdijk, Wout and Triebel, Rudolph and Durner, Maximilian}, title = {Conditional Latent Diffusion Models for Zero-Shot Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24360-24369} }
Stable Diffusion Models are Secretly Good at Visual In-Context Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Oorloff_2025_ICCV, author = {Oorloff, Trevine and Sindagi, Vishwanath and Bandara, Wele Gedara Chaminda and Shafahi, Ali and Ghiasi, Amin and Prakash, Charan and Ardekani, Reza}, title = {Stable Diffusion Models are Secretly Good at Visual In-Context Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23604-23613} }
Task-Specific Zero-shot Quantization-Aware Training for Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Changhao and Chen, Xinrui and Wang, Ji and Zhao, Kang and Chen, Jianfei}, title = {Task-Specific Zero-shot Quantization-Aware Training for Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22868-22878} }
Snakes and Ladders: Two Steps Up for VideoMamba-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_ICCV, author = {Lu, Hui and Salah, Albert A. and Poppe, Ronald}, title = {Snakes and Ladders: Two Steps Up for VideoMamba}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24234-24244} }
Generalized Few-Shot Point Cloud Segmentation via LLM-Assisted Hyper-Relation Matching-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Zhaoyang and Wang, Yuan and Xiong, Guoxin and Li, Wangkai and Pan, Yuwen and Zhang, Tianzhu}, title = {Generalized Few-Shot Point Cloud Segmentation via LLM-Assisted Hyper-Relation Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23063-23073} }
AdaptiveAE: An Adaptive Exposure Strategy for HDR Capturing in Dynamic Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Tianyi and Zhang, Fan and Shi, Boxin and Xue, Tianfan and Wang, Yujin}, title = {AdaptiveAE: An Adaptive Exposure Strategy for HDR Capturing in Dynamic Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25176-25185} }
UINavBench: A Framework for Comprehensive Evaluation of Interactive Digital Agents-
[pdf]
[supp]
[bibtex]@InProceedings{Agrawal_2025_ICCV, author = {Agrawal, Harsh and Schoop, Eldon and Pan, Xinlei and Mahajan, Anuj and Seff, Ari and Feng, Di and Cheng, Ruijia and Teran, Andres Romero Mier Y and Gomez, Esteban and Sundararajan, Abhishek and Huang, Forrest and Swearngin, Amanda and Moorthy, Mohana Prasad Sathya and Nichols, Jeff and Toshev, Alexander}, title = {UINavBench: A Framework for Comprehensive Evaluation of Interactive Digital Agents}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23353-23363} }
CityGS-X: A Scalable Architecture for Efficient and Geometrically Accurate Large-Scale Scene Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2025_ICCV, author = {Gao, Yuanyuan and Li, Hao and Chen, Jiaqi and Zou, Zhengyu and Zhong, Zhihang and Zhang, Dingwen and Sun, Xiao and Han, Junwei}, title = {CityGS-X: A Scalable Architecture for Efficient and Geometrically Accurate Large-Scale Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27187-27196} }
OcRFDet: Object-Centric Radiance Fields for Multi-View 3D Object Detection in Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ji_2025_ICCV, author = {Ji, Mingqian and Zhang, Shanshan and Yang, Jian}, title = {OcRFDet: Object-Centric Radiance Fields for Multi-View 3D Object Detection in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24933-24942} }
CC-OCR: A Comprehensive and Challenging OCR Benchmark for Evaluating Large Multimodal Models in Literacy-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Zhibo and Tang, Jun and Li, Zhaohai and Wang, Pengfei and Wan, Jianqiang and Zhong, Humen and Liu, Xuejing and Yang, Mingkun and Wang, Peng and Bai, Shuai and Jin, Lianwen and Lin, Junyang}, title = {CC-OCR: A Comprehensive and Challenging OCR Benchmark for Evaluating Large Multimodal Models in Literacy}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21744-21754} }
RCTDistill: Cross-Modal Knowledge Distillation Framework for Radar-Camera 3D Object Detection with Temporal Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bang_2025_ICCV, author = {Bang, Geonho and Seong, Minjae and Kim, Jisong and Baek, Geunju and Oh, Daye and Kim, Junhyung and Koh, Junho and Choi, Jun Won}, title = {RCTDistill: Cross-Modal Knowledge Distillation Framework for Radar-Camera 3D Object Detection with Temporal Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25315-25324} }
ViSpeak: Visual Instruction Feedback in Streaming Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fu_2025_ICCV, author = {Fu, Shenghao and Yang, Qize and Li, Yuan-Ming and Peng, Yi-Xing and Lin, Kun-Yu and Wei, Xihan and Hu, Jian-Fang and Xie, Xiaohua and Zheng, Wei-Shi}, title = {ViSpeak: Visual Instruction Feedback in Streaming Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21778-21788} }
VideoAds for Fast-Paced Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Zheyuan and Dou, Wanying and Peng, Linkai and Pan, Hongyi and Bagci, Ulas and Gong, Boqing}, title = {VideoAds for Fast-Paced Video Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21812-21821} }
MGSR: 2D/3D Mutual-boosted Gaussian Splatting for High-fidelity Surface Reconstruction under Various Light Conditions-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Qingyuan and Gong, Yuehu and Yang, Weidong and Li, Jiaze and Luo, Yeqi and Xu, Baixin and Li, Shuhao and Fei, Ben and He, Ying}, title = {MGSR: 2D/3D Mutual-boosted Gaussian Splatting for High-fidelity Surface Reconstruction under Various Light Conditions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27295-27304} }
MIEB: Massive Image Embedding Benchmark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2025_ICCV, author = {Xiao, Chenghao and Chung, Isaac and Kerboua, Imene and Stirling, Jamie and Zhang, Xin and Kardos, M\'arton and Solomatin, Roman and Al Moubayed, Noura and Enevoldsen, Kenneth and Muennighoff, Niklas}, title = {MIEB: Massive Image Embedding Benchmark}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22187-22198} }
SViM3D: Stable Video Material Diffusion for Single Image 3D Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Engelhardt_2025_ICCV, author = {Engelhardt, Andreas and Boss, Mark and Voleti, Vikram and Yao, Chun-Han and Lensch, Hendrik P. A. and Jampani, Varun}, title = {SViM3D: Stable Video Material Diffusion for Single Image 3D Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28428-28439} }
CoMoGaussian: Continuous Motion-Aware Gaussian Splatting from Motion-Blurred Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Jungho and Kim, Donghyeong and Lee, Dogyoon and Cho, Suhwan and Lee, Minhyeok and Lee, Wonjoon and Kim, Taeoh and Wee, Dongyoon and Lee, Sangyoun}, title = {CoMoGaussian: Continuous Motion-Aware Gaussian Splatting from Motion-Blurred Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26415-26424} }
WIPES: Wavelet-based Visual Primitives-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Wenhao and Zhu, Hao and Wu, Delong and Kang, Di and Bao, Linchao and Cao, Xun and Ma, Zhan}, title = {WIPES: Wavelet-based Visual Primitives}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27338-27347} }
Unsupervised Histopathological Image Semantic Segmentation with Overlapping Patches Consistency Constraint-
[pdf]
[bibtex]@InProceedings{Cai_2025_ICCV, author = {Cai, Wentian and Weng, Weizhao and Huang, Zihao and Chen, Yandan and Huang, Siquan and Gao, Ping and Leung, Victor C. M. and Gao, Ying}, title = {Unsupervised Histopathological Image Semantic Segmentation with Overlapping Patches Consistency Constraint}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23332-23341} }
Diff2I2P: Differentiable Image-to-Point Cloud Registration with Diffusion Prior-
[pdf]
[supp]
[bibtex]@InProceedings{Mu_2025_ICCV, author = {Mu, Juncheng and Ren, Chengwei and Zhang, Weixiang and Pan, Liang and Zhang, Xiao-Ping and Gao, Yue}, title = {Diff2I2P: Differentiable Image-to-Point Cloud Registration with Diffusion Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25777-25787} }
LightSwitch: Multi-view Relighting with Material-guided Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Litman_2025_ICCV, author = {Litman, Yehonathan and De la Torre, Fernando and Tulsiani, Shubham}, title = {LightSwitch: Multi-view Relighting with Material-guided Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27750-27759} }
CanFields: Consolidating Diffeomorphic Flows for Non-Rigid 4D Interpolation from Arbitrary-Length Sequences-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Miaowei and Li, Changjian and Vaxman, Amir}, title = {CanFields: Consolidating Diffeomorphic Flows for Non-Rigid 4D Interpolation from Arbitrary-Length Sequences}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28587-28598} }
Towards Foundational Models for Single-Chip Radar-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Tianshu and Prabhakara, Akarsh and Chen, Chuhan and Karhade, Jay and Ramanan, Deva and O'toole, Matthew and Rowe, Anthony}, title = {Towards Foundational Models for Single-Chip Radar}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24655-24665} }
Diffusion Transformer meets Multi-level Wavelet Spectrum for Single Image Super-Resolution-
[pdf]
[supp]
[bibtex]@InProceedings{Du_2025_ICCV, author = {Du, Peng and Li, Hui and Xu, Han and Jeon, Paul Barom and Lee, Dongwook and Ji, Daehyun and Yang, Ran and Zhu, Feng}, title = {Diffusion Transformer meets Multi-level Wavelet Spectrum for Single Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19700-19710} }
Robust Unfolding Network for HDR Imaging with Modulo Cameras-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Zhile and Ji, Hui}, title = {Robust Unfolding Network for HDR Imaging with Modulo Cameras}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25218-25228} }
Player-Centric Multimodal Prompt Generation for Large Language Model Based Identity-Aware Basketball Video Captioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xi_2025_ICCV, author = {Xi, Zeyu and Sun, Haoying and Wu, Yaofei and Yan, Junchi and Zhang, Haoran and Wu, Lifang and Wang, Liang and Chen, Changwen}, title = {Player-Centric Multimodal Prompt Generation for Large Language Model Based Identity-Aware Basketball Video Captioning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24330-24339} }
ArchiSet: Benchmarking Editable and Consistent Single-View 3D Reconstruction of Buildings with Specific Window-to-Wall Ratios-
[pdf]
[bibtex]@InProceedings{Yin_2025_ICCV, author = {Yin, Jun and Zeng, Pengyu and Shen, Licheng and Zhang, Miao and Zhong, Jing and Han, Yuxing and Lu, Shuai}, title = {ArchiSet: Benchmarking Editable and Consistent Single-View 3D Reconstruction of Buildings with Specific Window-to-Wall Ratios}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26004-26014} }
HyperGCT: A Dynamic Hyper-GNN-Learned Geometric Constraint for 3D Registration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xiyu and Ma, Jiayi and Guo, Jianwei and Hu, Wei and Qi, Zhaoshuai and Hui, Fei and Yang, Jiaqi and Zhang, Yanning}, title = {HyperGCT: A Dynamic Hyper-GNN-Learned Geometric Constraint for 3D Registration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24750-24759} }
UnMix-NeRF: Spectral Unmixing Meets Neural Radiance Fields-
[pdf]
[supp]
[bibtex]@InProceedings{Perez_2025_ICCV, author = {Perez, Fabian and Rojas, Sara and Hinojosa, Carlos and Rueda-Chac\'on, Hoover and Ghanem, Bernard}, title = {UnMix-NeRF: Spectral Unmixing Meets Neural Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26284-26293} }
Multidimensional Byte Pair Encoding: Shortened Sequences for Improved Visual Data Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Elsner_2025_ICCV, author = {Elsner, Tim and Usinger, Paula and Nehring-Wirxel, Julius and Kobsik, Gregor and Czech, Victor and He, Yanjiang and Lim, Isaak and Kobbelt, Leif}, title = {Multidimensional Byte Pair Encoding: Shortened Sequences for Improved Visual Data Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21331-21341} }
All in One: Visual-Description-Guided Unified Point Cloud Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2025_ICCV, author = {Han, Zongyan and El Amine Boudjoghra, Mohamed and Dong, Jiahua and Wang, Jinhong and Anwer, Rao Muhammad}, title = {All in One: Visual-Description-Guided Unified Point Cloud Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24835-24845} }
Controllable-LPMoE: Adapting to Challenging Object Segmentation via Dynamic Local Priors from Mixture-of-Experts-
[pdf]
[bibtex]@InProceedings{Sun_2025_ICCV, author = {Sun, Yanguang and Lian, Jiawei and Yang, Jian and Luo, Lei}, title = {Controllable-LPMoE: Adapting to Challenging Object Segmentation via Dynamic Local Priors from Mixture-of-Experts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22327-22337} }
Frequency-Dynamic Attention Modulation For Dense Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Linwei and Gu, Lin and Fu, Ying}, title = {Frequency-Dynamic Attention Modulation For Dense Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22620-22632} }
Semantic versus Identity: A Divide-and-Conquer Approach towards Adjustable Medical Image De-Identification-
[pdf]
[arXiv]
[bibtex]@InProceedings{Tian_2025_ICCV, author = {Tian, Yuan and Wang, Shuo and Zhang, Rongzhao and Chen, Zijian and Jiang, Yankai and Li, Chunyi and Zhu, Xiangyang and Yan, Fang and Hu, Qiang and Wang, XiaoSong and Zhai, Guangtao}, title = {Semantic versus Identity: A Divide-and-Conquer Approach towards Adjustable Medical Image De-Identification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20613-20625} }
Describe Anything: Detailed Localized Image and Video Captioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lian_2025_ICCV, author = {Lian, Long and Ding, Yifan and Ge, Yunhao and Liu, Sifei and Mao, Hanzi and Li, Boyi and Pavone, Marco and Liu, Ming-Yu and Darrell, Trevor and Yala, Adam and Cui, Yin}, title = {Describe Anything: Detailed Localized Image and Video Captioning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21766-21777} }
Point Cloud Self-supervised Learning via 3D to Multi-view Masked Learner-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Zhimin and Chen, Xuewei and Guo, Xiao and Li, Yingwei and Jing, Longlong and Yang, Liang and Li, Bing}, title = {Point Cloud Self-supervised Learning via 3D to Multi-view Masked Learner}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27618-27629} }
Probabilistic Prototype Calibration of Vision-language Models for Generalized Few-shot Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Jie and Shen, Jiayi and Zhou, Pan and Sonke, Jan-Jakob and Gavves, Efstratios}, title = {Probabilistic Prototype Calibration of Vision-language Models for Generalized Few-shot Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21155-21165} }
Triad: Empowering LMM-based Anomaly Detection with Expert-guided Region-of-Interest Tokenizer and Manufacturing Process-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Yuanze and Yuan, Shihao and Wang, Haolin and Li, Qizhang and Liu, Ming and Xu, Chen and Shi, Guangming and Zuo, Wangmeng}, title = {Triad: Empowering LMM-based Anomaly Detection with Expert-guided Region-of-Interest Tokenizer and Manufacturing Process}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21917-21926} }
When Anchors Meet Cold Diffusion: A Multi-Stage Approach to Lane Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_ICCV, author = {Huang, Bo-Lun and Ni, Zi-Xiang and Huang, Feng-Kai and Shuai, Hong-Han and Cheng, Wen-Huang}, title = {When Anchors Meet Cold Diffusion: A Multi-Stage Approach to Lane Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27917-27926} }
Breaking Grid Constraints: Dynamic Graph Reconstruction Network for Multi-organ Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Xiao_2025_ICCV, author = {Xiao, Junhao and Wei, Yang and Wang, Jingyu and Wang, Yongchao and Bi, Xiuli and Xiao, Bin}, title = {Breaking Grid Constraints: Dynamic Graph Reconstruction Network for Multi-organ Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24413-24422} }
Bilateral Collaboration with Large Vision-Language Models for Open Vocabulary Human-Object Interaction Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2025_ICCV, author = {Hu, Yupeng and Ding, Changxing and Sun, Chang and Huang, Shaoli and Xu, Xiangmin}, title = {Bilateral Collaboration with Large Vision-Language Models for Open Vocabulary Human-Object Interaction Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20126-20136} }
GaSLight: Gaussian Splats for Spatially-Varying Lighting in HDR-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bolduc_2025_ICCV, author = {Bolduc, Christophe and Hold-Geoffroy, Yannick and Lalonde, Jean-Fran\c{c}ois}, title = {GaSLight: Gaussian Splats for Spatially-Varying Lighting in HDR}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29120-29130} }
ATAS: Any-to-Any Self-Distillation for Enhanced Open-Vocabulary Dense Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yeo_2025_ICCV, author = {Yeo, Juan and Cha, Soonwoo and Song, Jiwoo and Jin, Hyunbin and Kim, Taesup}, title = {ATAS: Any-to-Any Self-Distillation for Enhanced Open-Vocabulary Dense Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20390-20400} }
Semantic-guided Camera Ray Regression for Visual Localization-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yesheng and Zhao, Xu}, title = {Semantic-guided Camera Ray Regression for Visual Localization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25639-25648} }
Multi-View Slot Attention Using Paraphrased Texts for Face Anti-Spoofing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_ICCV, author = {Yu, Jeongmin and Kim, Susang and Lee, Kisu and Kwon, Taekyoung and Shin, Won-Yong and Kim, Ha Young}, title = {Multi-View Slot Attention Using Paraphrased Texts for Face Anti-Spoofing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21117-21128} }
GCRayDiffusion: Pose-Free Surface Reconstruction via Geometric Consistent Ray Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Li-Heng and Zou, Zi-Xin and Liu, Chang and Jing, Tianjiao and Cao, Yan-Pei and Huang, Shi-Sheng and Fu, Hongbo and Huang, Hua}, title = {GCRayDiffusion: Pose-Free Surface Reconstruction via Geometric Consistent Ray Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25335-25345} }
Continual Multiple Instance Learning with Enhanced Localization for Histopathological Whole Slide Image Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Byung Hyun and Jeong, Wongi and Han, Woojae and Lee, Kyoungbun and Chun, Se Young}, title = {Continual Multiple Instance Learning with Enhanced Localization for Histopathological Whole Slide Image Analysis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23232-23242} }
Leveraging Debiased Cross-modal Attention Maps and Code-based Reasoning for Zero-shot Referring Expression Comprehension-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Juntao and Shen, Wen and Wei, Zhihua and Sun, Lijun and Zhang, Hongyun}, title = {Leveraging Debiased Cross-modal Attention Maps and Code-based Reasoning for Zero-shot Referring Expression Comprehension}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20413-20424} }
Instant GaussianImage: A Generalizable and Self-Adaptive Image Representation via 2D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2025_ICCV, author = {Zeng, Zhaojie and Wang, Yuesong and Guan, Tao and Yang, Chao and Ju, Lili}, title = {Instant GaussianImage: A Generalizable and Self-Adaptive Image Representation via 2D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27896-27905} }
AcZeroTS: Active Learning for Zero-shot Tissue Segmentation in Pathology Images-
[pdf]
[supp]
[bibtex]@InProceedings{Tang_2025_ICCV, author = {Tang, Jiao and Zhou, Junjie and Qian, Bo and Wan, Peng and Zuo, Yingli and Shao, Wei and Zhang, Daoqiang}, title = {AcZeroTS: Active Learning for Zero-shot Tissue Segmentation in Pathology Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23508-23518} }
Modeling Saliency Dataset Bias-
[pdf]
[supp]
[bibtex]@InProceedings{Kummerer_2025_ICCV, author = {K\"ummerer, Matthias and Khanuja, Harneet Singh and Bethge, Matthias}, title = {Modeling Saliency Dataset Bias}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22077-22088} }
MCOP: Multi-UAV Collaborative Occupancy Prediction-
[pdf]
[bibtex]@InProceedings{Lin_2025_ICCV, author = {Lin, Zefu and Chen, Wenbo and Jin, Xiaojuan and Yang, Yuran and Fan, Lue and Zhang, Yixin and Zhang, Yufeng and Zhang, Zhaoxiang}, title = {MCOP: Multi-UAV Collaborative Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27242-27251} }
Tree Skeletonization from 3D Point Clouds by Denoising Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Marks_2025_ICCV, author = {Marks, Elias Ariel and Nunes, Lucas and Magistri, Federico and Sodano, Matteo and Marcuzzi, Rodrigo and Zimmermann, Lars and Behley, Jens and Stachniss, Cyrill}, title = {Tree Skeletonization from 3D Point Clouds by Denoising Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27607-27617} }
From Panels to Prose: Generating Literary Narratives from Comics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sachdeva_2025_ICCV, author = {Sachdeva, Ragav and Zisserman, Andrew}, title = {From Panels to Prose: Generating Literary Narratives from Comics}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21864-21873} }
VMem: Consistent Interactive Video Scene Generation with Surfel-Indexed View Memory-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Runjia and Torr, Philip and Vedaldi, Andrea and Jakab, Tomas}, title = {VMem: Consistent Interactive Video Scene Generation with Surfel-Indexed View Memory}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25690-25699} }
Learning Null Geodesics for Gravitational Lensing Rendering in General Relativity-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2025_ICCV, author = {Sun, Mingyuan and Fang, Zheng and Wang, Jiaxu and Zhang, Kunyi and Zhang, Qiang and Xu, Renjing}, title = {Learning Null Geodesics for Gravitational Lensing Rendering in General Relativity}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28473-28482} }
Towards Accurate and Efficient 3D Object Detection for Autonomous Driving: A Mixture of Experts Computing System on Edge-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Linshen and Su, Boyan and Jiang, Junyue and Wu, Guanlin and Guo, Cong and Xu, Ceyu and Yang, Hao Frank}, title = {Towards Accurate and Efficient 3D Object Detection for Autonomous Driving: A Mixture of Experts Computing System on Edge}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25903-25913} }
Uncertainty-Driven Expert Control: Enhancing the Reliability of Medical Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2025_ICCV, author = {Liang, Xiao and Wang, Di and Jiao, Zhicheng and Li, Ronghan and Yang, Pengfei and Wang, Quan and Chua, Tat-Seng}, title = {Uncertainty-Driven Expert Control: Enhancing the Reliability of Medical Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21144-21154} }
Mitigating Geometric Degradation in Fast DownSampling via FastAdapter for Point Cloud Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_ICCV, author = {Sun, Shuofeng and Yan, Haibin}, title = {Mitigating Geometric Degradation in Fast DownSampling via FastAdapter for Point Cloud Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25983-25992} }
Dual-S3D: Hierarchical Dual-Path Selective SSM-CNN for High-Fidelity Implicit Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Luoxi and Shrestha, Pragyan and Zhou, Yu and Xie, Chun and Kitahara, Itaru}, title = {Dual-S3D: Hierarchical Dual-Path Selective SSM-CNN for High-Fidelity Implicit Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25104-25113} }
Lidar Waveforms are Worth 40x128x33 Words-
[pdf]
[supp]
[bibtex]@InProceedings{Scheuble_2025_ICCV, author = {Scheuble, Dominik and Holzh\"uter, Hanno and Peters, Steven and Bijelic, Mario and Heide, Felix}, title = {Lidar Waveforms are Worth 40x128x33 Words}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28913-28924} }
SALAD -- Semantics-Aware Logical Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Fucka_2025_ICCV, author = {Fu\v{c}ka, Matic and Zavrtanik, Vitjan and Sko\v{c}aj, Danijel}, title = {SALAD -- Semantics-Aware Logical Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21843-21852} }
ARGUS: Hallucination and Omission Evaluation in Video-LLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rawal_2025_ICCV, author = {Rawal, Ruchit and Shirkavand, Reza and Huang, Heng and Somepalli, Gowthami and Goldstein, Tom}, title = {ARGUS: Hallucination and Omission Evaluation in Video-LLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20280-20290} }
Group Inertial Poser: Multi-Person Pose and Global Translation from Sparse Inertial Sensors and Ultra-Wideband Ranging-
[pdf]
[bibtex]@InProceedings{Xue_2025_ICCV, author = {Xue, Ying and Jiang, Jiaxi and Armani, Rayan and Hollidt, Dominik and Liao, Yi-Chi and Holz, Christian}, title = {Group Inertial Poser: Multi-Person Pose and Global Translation from Sparse Inertial Sensors and Ultra-Wideband Ranging}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24910-24921} }
NormalLoc: Visual Localization on Textureless 3D Models using Surface Normals-
[pdf]
[supp]
[bibtex]@InProceedings{Abe_2025_ICCV, author = {Abe, Jiro and Nakano, Gaku and Ogura, Kazumine}, title = {NormalLoc: Visual Localization on Textureless 3D Models using Surface Normals}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25421-25430} }
Domain-aware Category-level Geometry Learning Segmentation for 3D Point Clouds-
[pdf]
[arXiv]
[bibtex]@InProceedings{He_2025_ICCV, author = {He, Pei and Li, Lingling and Jiao, Licheng and Shang, Ronghua and Liu, Fang and Wang, Shuang and Liu, Xu and Ma, Wenping}, title = {Domain-aware Category-level Geometry Learning Segmentation for 3D Point Clouds}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28324-28333} }
Improving SAM for Camouflaged Object Detection via Dual Stream Adapters-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Jiaming and Kong, Linghe and Chen, Guihai}, title = {Improving SAM for Camouflaged Object Detection via Dual Stream Adapters}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21906-21916} }
DiSCO-3D : Discovering and Segmenting Sub-Concepts from Open-vocabulary Queries in NeRF-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Petit_2025_ICCV, author = {Petit, Doriand and Bourgeois, Steve and Gay-Bellile, Vincent and Chabot, Florian and Barthe, Lo{\"\i}c}, title = {DiSCO-3D : Discovering and Segmenting Sub-Concepts from Open-vocabulary Queries in NeRF}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20043-20052} }
VCA: Video Curious Agent for Long Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Zeyuan and Chen, Delin and Yu, Xueyang and Shen, Maohao and Gan, Chuang}, title = {VCA: Video Curious Agent for Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20168-20179} }
ForeSight: Multi-View Streaming Joint Object Detection and Trajectory Forecasting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Papais_2025_ICCV, author = {Papais, Sandro and Wang, Letian and Cheong, Brian and Waslander, Steven L.}, title = {ForeSight: Multi-View Streaming Joint Object Detection and Trajectory Forecasting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25474-25484} }
SG-LDM: Semantic-Guided LiDAR Generation via Latent-Aligned Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Xiang_2025_ICCV, author = {Xiang, Zhengkang and Li, Zizhao and Khodabandeh, Amir and Khoshelham, Kourosh}, title = {SG-LDM: Semantic-Guided LiDAR Generation via Latent-Aligned Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24965-24976} }
Dynamic-DINO: Fine-Grained Mixture of Experts Tuning for Real-time Open-Vocabulary Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2025_ICCV, author = {Lu, Yehao and Weng, Minghe and Xiao, Zekang and Jiang, Rui and Su, Wei and Zheng, Guangcong and Lu, Ping and Li, Xi}, title = {Dynamic-DINO: Fine-Grained Mixture of Experts Tuning for Real-time Open-Vocabulary Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20847-20856} }
BridgeDepth: Bridging Monocular and Stereo Reasoning with Latent Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guan_2025_ICCV, author = {Guan, Tongfan and Guo, Jiaxin and Wang, Chen and Liu, Yun-Hui}, title = {BridgeDepth: Bridging Monocular and Stereo Reasoning with Latent Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27681-27691} }
ViLLa: Video Reasoning Segmentation with Large Language Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_ICCV, author = {Zheng, Rongkun and Qi, Lu and Chen, Xi and Wang, Yi and Wang, Kun and Zhao, Hengshuang}, title = {ViLLa: Video Reasoning Segmentation with Large Language Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23667-23677} }
RARE: Refine Any Registration of Pairwise Point Clouds via Zero-Shot Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_ICCV, author = {Zheng, Chengyu and Huang, Jin and Chen, Honghua and Wei, Mingqiang}, title = {RARE: Refine Any Registration of Pairwise Point Clouds via Zero-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26549-26558} }
IntrinsicControlNet: Cross-distribution Image Generation with Real and Unreal-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2025_ICCV, author = {Lu, Jiayuan and Xie, Rengan and Xie, Zixuan and Wu, Zhizhen and Xi, Dianbing and Ye, Qi and Wang, Rui and Bao, Hujun and Huo, Yuchi}, title = {IntrinsicControlNet: Cross-distribution Image Generation with Real and Unreal}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27315-27325} }
Flash-VStream: Efficient Real-Time Understanding for Long Video Streams-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Haoji and Wang, Yiqin and Tang, Yansong and Liu, Yong and Feng, Jiashi and Jin, Xiaojie}, title = {Flash-VStream: Efficient Real-Time Understanding for Long Video Streams}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21059-21069} }
GroundFlow: A Plug-in Module for Temporal Reasoning on 3D Point Cloud Sequential Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_ICCV, author = {Lin, Zijun and He, Shuting and Tan, Cheston and Wen, Bihan}, title = {GroundFlow: A Plug-in Module for Temporal Reasoning on 3D Point Cloud Sequential Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28774-28784} }
DAA*: Deep Angular A Star for Image-based Path Planning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_ICCV, author = {Xu, Zhiwei}, title = {DAA*: Deep Angular A Star for Image-based Path Planning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25284-25293} }
Perspective-Invariant 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2025_ICCV, author = {Liang, Ao and Kong, Lingdong and Lu, Dongyue and Liu, Youquan and Fang, Jian and Zhao, Huaici and Ooi, Wei Tsang}, title = {Perspective-Invariant 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27725-27738} }
MixA: A Mixed Attention approach with Stable Lightweight Linear Attention to enhance Efficiency of Vision Transformers at the Edge-
[pdf]
[supp]
[bibtex]@InProceedings{Ahmed_2025_ICCV, author = {Ahmed, Sabbir and Li, Jingtao and Zhuang, Weiming and Chen, Chen and Lyu, Lingjuan}, title = {MixA: A Mixed Attention approach with Stable Lightweight Linear Attention to enhance Efficiency of Vision Transformers at the Edge}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21187-21196} }
LIRA: Inferring Segmentation in Large Multi-modal Models with Local Interleaved Region Assistance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Zhang and Yang, Biao and Liu, Qiang and Zhang, Shuo and Ma, Zhiyin and Yin, Liang and Deng, Linger and Sun, Yabo and Liu, Yuliang and Bai, Xiang}, title = {LIRA: Inferring Segmentation in Large Multi-modal Models with Local Interleaved Region Assistance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24056-24067} }
Adapting In-Domain Few-Shot Segmentation to New Domains without Source Domain Retraining-
[pdf]
[bibtex]@InProceedings{Fan_2025_ICCV, author = {Fan, Qi and Liu, Kaiqi and Liu, Nian and Cholakkal, Hisham and Anwer, Rao Muhammad and Li, Wenbin and Gao, Yang}, title = {Adapting In-Domain Few-Shot Segmentation to New Domains without Source Domain Retraining}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21429-21439} }
HUG: Hierarchical Urban Gaussian Splatting with Block-Based Reconstruction for Large-Scale Aerial Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Su_2025_ICCV, author = {Su, Mai and Wang, Zhongtao and Au, Huishan and Li, Yilong and Cao, Xizhe and Pan, Chengwei and Chen, Yisong and Wang, Guoping}, title = {HUG: Hierarchical Urban Gaussian Splatting with Block-Based Reconstruction for Large-Scale Aerial Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28839-28848} }
Seam360GS: Seamless 360deg Gaussian Splatting from Real-World Omnidirectional Images-
[pdf]
[supp]
[bibtex]@InProceedings{Shin_2025_ICCV, author = {Shin, Changha and Cho, Woong Oh and Kim, Seon Joo}, title = {Seam360GS: Seamless 360deg Gaussian Splatting from Real-World Omnidirectional Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28970-28979} }
Talking to DINO: Bridging Self-Supervised Vision Backbones with Language for Open-Vocabulary Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Barsellotti_2025_ICCV, author = {Barsellotti, Luca and Bianchi, Lorenzo and Messina, Nicola and Carrara, Fabio and Cornia, Marcella and Baraldi, Lorenzo and Falchi, Fabrizio and Cucchiara, Rita}, title = {Talking to DINO: Bridging Self-Supervised Vision Backbones with Language for Open-Vocabulary Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22025-22035} }
ROAR: Reducing Inversion Error in Generative Image Watermarking-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Hanyi and Fang, Han and Wang, Shi-Lin and Chang, Ee-Chien}, title = {ROAR: Reducing Inversion Error in Generative Image Watermarking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19742-19751} }
Learning Beyond Still Frames: Scaling Vision-Language Models with Video-
[pdf]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yiyuan and Li, Handong and Liu, Jing and Yue, Xiangyu}, title = {Learning Beyond Still Frames: Scaling Vision-Language Models with Video}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22425-22435} }
MultiADS: Defect-aware Supervision for Multi-type Anomaly Detection and Segmentation in Zero-Shot Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sadikaj_2025_ICCV, author = {Sadikaj, Ylli and Zhou, Hongkuan and Halilaj, Lavdim and Schmid, Stefan and Staab, Steffen and Plant, Claudia}, title = {MultiADS: Defect-aware Supervision for Multi-type Anomaly Detection and Segmentation in Zero-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22978-22988} }
SEGS-SLAM: Structure-enhanced 3D Gaussian Splatting SLAM with Appearance Embedding-
[pdf]
[supp]
[bibtex]@InProceedings{Wen_2025_ICCV, author = {Wen, Tianci and Liu, Zhiang and Fang, Yongchun}, title = {SEGS-SLAM: Structure-enhanced 3D Gaussian Splatting SLAM with Appearance Embedding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28103-28113} }
Images as Noisy Labels: Unleashing the Potential of the Diffusion Model for Open-Vocabulary Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Fan and Wang, Xuanbin and Wang, Xuan and Zhang, Zhaoxiang and Xu, Yuelei}, title = {Images as Noisy Labels: Unleashing the Potential of the Diffusion Model for Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24255-24265} }
Tile-wise vs. Image-wise: Random-Tile Loss and Training Paradigm for Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xiaoyu and Pan, Weihong and Xiang, Xiaojun and Zhai, Hongjia and Zhou, Liyang and Jiang, Hanqing and Zhang, Guofeng}, title = {Tile-wise vs. Image-wise: Random-Tile Loss and Training Paradigm for Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26923-26932} }
Super Resolved Imaging with Adaptive Optics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Swanson_2025_ICCV, author = {Swanson, Robin and Lin, Esther Y. H. and Lamb, Masen and Sivanandam, Suresh and Kutulakos, Kiriakos N.}, title = {Super Resolved Imaging with Adaptive Optics}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29142-29152} }
BillBoard Splatting (BBSplat): Learnable Textured Primitives for Novel View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Svitov_2025_ICCV, author = {Svitov, David and Morerio, Pietro and Agapito, Lourdes and Del Bue, Alessio}, title = {BillBoard Splatting (BBSplat): Learnable Textured Primitives for Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25029-25039} }
SC-Lane: Slope-aware and Consistent Road Height Estimation Framework for 3D Lane Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2025_ICCV, author = {Park, Chaesong and Seo, Eunbin and Hwang, Jihyeon and Lim, Jongwoo}, title = {SC-Lane: Slope-aware and Consistent Road Height Estimation Framework for 3D Lane Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28407-28416} }
MedSegFactory: Text-Guided Generation of Medical Image-Mask Pairs-
[pdf]
[arXiv]
[bibtex]@InProceedings{Mao_2025_ICCV, author = {Mao, Jiawei and Wang, Yuhan and Tang, Yucheng and Xu, Daguang and Wang, Kang and Yang, Yang and Zhou, Zongwei and Zhou, Yuyin}, title = {MedSegFactory: Text-Guided Generation of Medical Image-Mask Pairs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21525-21535} }
ReAL-AD: Towards Human-Like Reasoning in End-to-End Autonomous Driving-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2025_ICCV, author = {Lu, Yuhang and Tu, Jiadong and Ma, Yuexin and Zhu, Xinge}, title = {ReAL-AD: Towards Human-Like Reasoning in End-to-End Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27783-27793} }
ReME: A Data-Centric Framework for Training-Free Open-Vocabulary Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xuan_2025_ICCV, author = {Xuan, Xiwei and Deng, Ziquan and Ma, Kwan-Liu}, title = {ReME: A Data-Centric Framework for Training-Free Open-Vocabulary Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20954-20965} }
A Visual Leap in CLIP Compositionality Reasoning through Generation of Counterfactual Sets-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jia_2025_ICCV, author = {Jia, Zexi and Huang, Chuanwei and Fei, Hongyan and Zhu, Yeshuang and Yuan, Zhiqiang and Deng, Ying and Zhang, Jiapei and Zhang, Jinchao and Zhou, Jie}, title = {A Visual Leap in CLIP Compositionality Reasoning through Generation of Counterfactual Sets}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23498-23507} }
Superpowering Open-Vocabulary Object Detectors for X-ray Vision-
[pdf]
[supp]
[bibtex]@InProceedings{Garcia-Fernandez_2025_ICCV, author = {Garcia-Fernandez, Pablo and Vaquero, Lorenzo and Liu, Mingxuan and Xue, Feng and Cores, Daniel and Sebe, Nicu and Mucientes, Manuel and Ricci, Elisa}, title = {Superpowering Open-Vocabulary Object Detectors for X-ray Vision}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20770-20779} }
LVBench: An Extreme Long Video Understanding Benchmark-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Weihan and He, Zehai and Hong, Wenyi and Cheng, Yean and Zhang, Xiaohan and Qi, Ji and Ding, Ming and Gu, Xiaotao and Huang, Shiyu and Xu, Bin and Dong, Yuxiao and Tang, Jie}, title = {LVBench: An Extreme Long Video Understanding Benchmark}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22958-22967} }
SpatialCrafter: Unleashing the Imagination of Video Diffusion Models for Scene Reconstruction from Limited Observations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Songchun and Xu, Huiyao and Guo, Sitong and Xie, Zhongwei and Bao, Hujun and Xu, Weiwei and Zou, Changqing}, title = {SpatialCrafter: Unleashing the Imagination of Video Diffusion Models for Scene Reconstruction from Limited Observations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27794-27805} }
Multi-Granular Spatio-Temporal Token Merging for Training-Free Acceleration of Video LLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hyun_2025_ICCV, author = {Hyun, Jeongseok and Hwang, Sukjun and Han, Su Ho and Kim, Taeoh and Lee, Inwoong and Wee, Dongyoon and Lee, Joon-Young and Kim, Seon Joo and Shim, Minho}, title = {Multi-Granular Spatio-Temporal Token Merging for Training-Free Acceleration of Video LLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23990-24000} }
Inverse Image-Based Rendering for Light Field Generation from Single Images-
[pdf]
[supp]
[bibtex]@InProceedings{Jung_2025_ICCV, author = {Jung, Hyunjun and Jeon, Hae-Gon}, title = {Inverse Image-Based Rendering for Light Field Generation from Single Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24739-24749} }
Generalized and Efficient 2D Gaussian Splatting for Arbitrary-scale Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Du and Chen, Liyi and Zhang, Zhengqiang and Zhang, Lei}, title = {Generalized and Efficient 2D Gaussian Splatting for Arbitrary-scale Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26435-26445} }
Pretend Benign: A Stealthy Adversarial Attack by Exploiting Vulnerabilities in Cooperative Perception-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2025_ICCV, author = {Lin, Hongwei and Pan, Dongyu and Xia, Qiming and Wu, Hai and Wang, Cheng and Shen, Siqi and Wen, Chenglu}, title = {Pretend Benign: A Stealthy Adversarial Attack by Exploiting Vulnerabilities in Cooperative Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19947-19956} }
ProbMED: A Probabilistic Framework for Medical Multimodal Binding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_ICCV, author = {Gao, Yuan and Kim, Sangwook and You, Jianzhong and McIntosh, Chris}, title = {ProbMED: A Probabilistic Framework for Medical Multimodal Binding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20157-20167} }
QuickSplat: Fast 3D Surface Reconstruction via Learned Gaussian Initialization-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_ICCV, author = {Liu, Yueh-Cheng and H\"ollein, Lukas and Nie{\ss}ner, Matthias and Dai, Angela}, title = {QuickSplat: Fast 3D Surface Reconstruction via Learned Gaussian Initialization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27851-27861} }
MetaScope: Optics-Driven Neural Network for Ultra-Micro Metalens Endoscopy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_ICCV, author = {Li, Wuyang and Pan, Wentao and Liu, Xiaoyuan and Luo, Zhendong and Li, Chenxin and Liu, Hengyu and Tsai, Din Ping and Chen, Mu Ku and Yuan, Yixuan}, title = {MetaScope: Optics-Driven Neural Network for Ultra-Micro Metalens Endoscopy}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25938-25950} }
SparseRecon: Neural Implicit Surface Reconstruction from Sparse Views with Feature and Depth Consistencies-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2025_ICCV, author = {Han, Liang and Zhang, Xu and Song, Haichuan and Shi, Kanle and Liu, Yu-Shen and Han, Zhizhong}, title = {SparseRecon: Neural Implicit Surface Reconstruction from Sparse Views with Feature and Depth Consistencies}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28514-28524} }
Leveraging the Power of MLLMs for Gloss-Free Sign Language Translation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_ICCV, author = {Kim, Jungeun and Jeon, Hyeongwoo and Bae, Jongseong and Kim, Ha Young}, title = {Leveraging the Power of MLLMs for Gloss-Free Sign Language Translation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21048-21058} }
UniMLVG: Unified Framework for Multi-view Long Video Generation with Comprehensive Control Capabilities for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Rui and Wu, Zehuan and Liu, Yichen and Guo, Yuxin and Ni, Jingcheng and Xia, Haifeng and Xia, Siyu}, title = {UniMLVG: Unified Framework for Multi-view Long Video Generation with Comprehensive Control Capabilities for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25453-25463} }
CogCM: Cognition-Inspired Contextual Modeling for Audio-Visual Speech Enhancement-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Feixiang and Yang, Shuang and Shan, Shiguang and Chen, Xilin}, title = {CogCM: Cognition-Inspired Contextual Modeling for Audio-Visual Speech Enhancement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21408-21418} }
Uncertainty-Aware Diffusion-Guided Refinement of 3D Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bose_2025_ICCV, author = {Bose, Sarosij and Dutta, Arindam and Nag, Sayak and Zhang, Junge and Li, Jiachen and Karydis, Konstantinos and Roy-Chowdhury, Amit K.}, title = {Uncertainty-Aware Diffusion-Guided Refinement of 3D Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28271-28281} }
LLaFEA: Frame-Event Complementary Fusion for Fine-Grained Spatiotemporal Understanding in LMMs-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_ICCV, author = {Zhou, Hanyu and Lee, Gim Hee}, title = {LLaFEA: Frame-Event Complementary Fusion for Fine-Grained Spatiotemporal Understanding in LMMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22294-22304} }
FastPoint: Accelerating 3D Point Cloud Model Inference via Sample Point Distance Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_ICCV, author = {Lee, Donghyun and Jeong, Dawoon and Lee, Jae W. and Yoon, Hongil}, title = {FastPoint: Accelerating 3D Point Cloud Model Inference via Sample Point Distance Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25114-25123} }
GSRecon: Efficient Generalizable Gaussian Splatting for Surface Reconstruction from Sparse Views-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Hang and Hui, Le and Qian, Jianjun and Xie, Jin and Yang, Jian}, title = {GSRecon: Efficient Generalizable Gaussian Splatting for Surface Reconstruction from Sparse Views}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25346-25356} }
Beyond [cls]: Exploring the True Potential of Masked Image Modeling Representations-
[pdf]
[supp]
[bibtex]@InProceedings{Przewiezlikowski_2025_ICCV, author = {Przewi\k{e}\'zlikowski, Marcin and Balestriero, Randall and Jasi\'nski, Wojciech and \'Smieja, Marek and Zieli\'nski, Bartosz}, title = {Beyond [cls]: Exploring the True Potential of Masked Image Modeling Representations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23442-23452} }
SpikePack: Enhanced Information Flow in Spiking Neural Networks with High Hardware Compatibility-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shen_2025_ICCV, author = {Shen, Guobin and Li, Jindong and Li, Tenglong and Zhao, Dongcheng and Zeng, Yi}, title = {SpikePack: Enhanced Information Flow in Spiking Neural Networks with High Hardware Compatibility}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23385-23395} }
VFlowOpt: A Token Pruning Framework for LMMs with Visual Information Flow-Guided Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_ICCV, author = {Yang, Sihan and Xu, Runsen and Cui, Chenhang and Wang, Tai and Lin, Dahua and Pang, Jiangmiao}, title = {VFlowOpt: A Token Pruning Framework for LMMs with Visual Information Flow-Guided Optimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23924-23934} }
Text-guided Visual Prompt DINO for Generic Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Guan_2025_ICCV, author = {Guan, Yuchen and Sun, Chong and Fu, Canmiao and Huang, Zhipeng and Yuan, Chun and Li, Chen}, title = {Text-guided Visual Prompt DINO for Generic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21288-21298} }
InfiniCube: Unbounded and Controllable Dynamic 3D Driving Scene Generation with World-Guided Video Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_ICCV, author = {Lu, Yifan and Ren, Xuanchi and Yang, Jiawei and Shen, Tianchang and Wu, Zhangjie and Gao, Jun and Wang, Yue and Chen, Siheng and Chen, Mike and Fidler, Sanja and Huang, Jiahui}, title = {InfiniCube: Unbounded and Controllable Dynamic 3D Driving Scene Generation with World-Guided Video Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27272-27283} }
CABLD: Contrast-Agnostic Brain Landmark Detection with Consistency-Based Regularization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Salari_2025_ICCV, author = {Salari, Soorena and Harirpoush, Arash and Rivaz, Hassan and Xiao, Yiming}, title = {CABLD: Contrast-Agnostic Brain Landmark Detection with Consistency-Based Regularization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20991-21002} }
A Differentiable Wave Optics Model for End-to-End Computational Imaging System Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ho_2025_ICCV, author = {Ho, Chi-Jui and Belhe, Yash and Rotenberg, Steve and Ramamoorthi, Ravi and Li, Tzu-Mao and Antipa, Nicholas}, title = {A Differentiable Wave Optics Model for End-to-End Computational Imaging System Optimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28042-28051} }
DoppDrive: Doppler-Driven Temporal Aggregation for Improved Radar Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Haitman_2025_ICCV, author = {Haitman, Yuval and Bialer, Oded}, title = {DoppDrive: Doppler-Driven Temporal Aggregation for Improved Radar Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26085-26094} }
YOLOE: Real-Time Seeing Anything-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Ao and Liu, Lihao and Chen, Hui and Lin, Zijia and Han, Jungong and Ding, Guiguang}, title = {YOLOE: Real-Time Seeing Anything}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24591-24602} }
Beyond Text-Visual Attention: Exploiting Visual Cues for Effective Token Pruning in VLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_ICCV, author = {Zhang, Qizhe and Cheng, Aosong and Lu, Ming and Zhang, Renrui and Zhuo, Zhiyong and Cao, Jiajun and Guo, Shaobo and She, Qi and Zhang, Shanghang}, title = {Beyond Text-Visual Attention: Exploiting Visual Cues for Effective Token Pruning in VLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20857-20867} }
Griffon v2: Advancing Multimodal Perception with High-Resolution Scaling and Visual-Language Co-Referring-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhan_2025_ICCV, author = {Zhan, Yufei and Zheng, Shurong and Zhu, Yousong and Zhao, Hongyin and Yang, Fan and Tang, Ming and Wang, Jinqiao}, title = {Griffon v2: Advancing Multimodal Perception with High-Resolution Scaling and Visual-Language Co-Referring}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22947-22957} }
SDFormer: Vision-based 3D Semantic Scene Completion via SAM-assisted Dual-channel Voxel Transformer-
[pdf]
[supp]
[bibtex]@InProceedings{Xue_2025_ICCV, author = {Xue, Yujie and Pi, Huilong and Zhang, Jiapeng and Qin, Yunchuan and Tang, Zhuo and Li, Kenli and Li, Ruihui}, title = {SDFormer: Vision-based 3D Semantic Scene Completion via SAM-assisted Dual-channel Voxel Transformer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26837-26847} }
ForestFormer3D: A Unified Framework for End-to-End Segmentation of Forest LiDAR 3D Point Clouds-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiang_2025_ICCV, author = {Xiang, Binbin and Wielgosz, Maciej and Puliti, Stefano and Kr\'al, Kamil and Kr\r{u}\v{c}ek, Martin and Missarov, Azim and Astrup, Rasmus}, title = {ForestFormer3D: A Unified Framework for End-to-End Segmentation of Forest LiDAR 3D Point Clouds}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24717-24727} }
AccidentalGS: 3D Gaussian Splatting from Accidental Camera Motion-
[pdf]
[supp]
[bibtex]@InProceedings{Mao_2025_ICCV, author = {Mao, Mao and Shen, Xujie and Chen, Guyuan and Zhao, Boming and Hu, Jiarui and Bao, Hujun and Cui, Zhaopeng}, title = {AccidentalGS: 3D Gaussian Splatting from Accidental Camera Motion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27445-27455} }
Neuroverse3D: Developing In-Context Learning Universal Model for Neuroimaging in 3D-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2025_ICCV, author = {Hu, Jiesi and Peng, Hanyang and Yang, Yanwu and Guo, Xutao and Shang, Yang and Shi, Pengcheng and Ye, Chenfei and Ma, Ting}, title = {Neuroverse3D: Developing In-Context Learning Universal Model for Neuroimaging in 3D}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21721-21731} }
ProSAM: Enhancing the Robustness of SAM-based Visual Reference Segmentation with Probabilistic Prompts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_ICCV, author = {Wang, Xiaoqi and Sebastian, Clint and He, Wenbin and Ren, Liu}, title = {ProSAM: Enhancing the Robustness of SAM-based Visual Reference Segmentation with Probabilistic Prompts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20487-20496} }
Allowing Oscillation Quantization: Overcoming Solution Space Limitation in Low Bit-Width Quantization-
[pdf]
[bibtex]@InProceedings{Xie_2025_ICCV, author = {Xie, Weiying and Meng, Zihan and Ma, Jitao and Guo, Wenjin and Li, Haowei and Qin, Haonan and Fang, Leyuan and Li, Yunsong}, title = {Allowing Oscillation Quantization: Overcoming Solution Space Limitation in Low Bit-Width Quantization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24615-24624} }
SAMora: Enhancing SAM through Hierarchical Self-Supervised Pre-Training for Medical Images-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Shuhang and Yuan, Hangjie and Liu, Pengwei and Gu, Hanxue and Feng, Tao and Ni, Dong}, title = {SAMora: Enhancing SAM through Hierarchical Self-Supervised Pre-Training for Medical Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21209-21219} }
PathDiff: Histopathology Image Synthesis with Unpaired Text and Mask Conditions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bhosale_2025_ICCV, author = {Bhosale, Mahesh and Wasi, Abdul and Zhai, Yuanhao and Tian, Yunjie and Border, Samuel and Xi, Nan and Sarder, Pinaki and Yuan, Junsong and Doermann, David and Gong, Xuan}, title = {PathDiff: Histopathology Image Synthesis with Unpaired Text and Mask Conditions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22415-22424} }
GVDepth: Zero-Shot Monocular Depth Estimation for Ground Vehicles based on Probabilistic Cue Fusion-
[pdf]
[supp]
[bibtex]@InProceedings{Koledic_2025_ICCV, author = {Koledi\'c, Karlo and Petrovi\'c, Luka and Markovi\'c, Ivan and Petrovi\'c, Ivan}, title = {GVDepth: Zero-Shot Monocular Depth Estimation for Ground Vehicles based on Probabilistic Cue Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26126-26135} }
One Trajectory, One Token: Grounded Video Tokenization via Panoptic Sub-object Trajectory-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_ICCV, author = {Zheng, Chenhao and Zhang, Jieyu and Salehi, Mohammadreza and Gao, Ziqi and Iyengar, Vishnu and Kobori, Norimasa and Kong, Quan and Krishna, Ranjay}, title = {One Trajectory, One Token: Grounded Video Tokenization via Panoptic Sub-object Trajectory}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23156-23166} }
Kaleidoscopic Background Attack: Disrupting Pose Estimation with Multi-Fold Radial Symmetry Textures-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ding_2025_ICCV, author = {Ding, Xinlong and Yu, Hongwei and Li, Jiawei and Li, Feifan and Shang, Yu and Zou, Bochao and Ma, Huimin and Chen, Jiansheng}, title = {Kaleidoscopic Background Attack: Disrupting Pose Estimation with Multi-Fold Radial Symmetry Textures}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28483-28492} }
Agreement aware and dissimilarity oriented GLOM-
[pdf]
[supp]
[bibtex]@InProceedings{Zeng_2025_ICCV, author = {Zeng, Ru and Song, Yan and Zhang, Yang and Hu, Yanling and Yu, Hui}, title = {Agreement aware and dissimilarity oriented GLOM}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24351-24359} }
PanoSplatt3R: Leveraging Perspective Pretraining for Generalized Unposed Wide-Baseline Panorama Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2025_ICCV, author = {Ren, Jiahui and Xiang, Mochu and Zhu, Jiajun and Dai, Yuchao}, title = {PanoSplatt3R: Leveraging Perspective Pretraining for Generalized Unposed Wide-Baseline Panorama Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28959-28969} }
SmolDocling: An ultra-compact vision-language model for end-to-end multi-modal document conversion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nassar_2025_ICCV, author = {Nassar, Ahmed and Omenetti, Matteo and Lysak, Maksym and Livathinos, Nikolaos and Auer, Christoph and Morin, Lucas and de Lima, Rafael Teixeira and Kim, Yusik and Gurbuz, A. Said and Dolfi, Michele and Staar, Peter W. J.}, title = {SmolDocling: An ultra-compact vision-language model for end-to-end multi-modal document conversion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21972-21983} }
Zero-Shot Composed Image Retrieval via Dual-Stream Instruction-Aware Distillation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhong_2025_ICCV, author = {Zhong, Wenliang and Barton, Rob and An, Weizhi and Jiang, Feng and Ma, Hehuan and Guo, Yuzhi and Dan, Abhishek and Sam, Shioulin and Bouyarmane, Karim and Huang, Junzhou}, title = {Zero-Shot Composed Image Retrieval via Dual-Stream Instruction-Aware Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22221-22231} }
UniGS: Modeling Unitary 3D Gaussians for Novel View Synthesis from Sparse-view Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_ICCV, author = {Wu, Jiamin and Liu, Kenkun and Jiang, Xiaoke and Yao, Yuan and Zhang, Lei}, title = {UniGS: Modeling Unitary 3D Gaussians for Novel View Synthesis from Sparse-view Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26241-26251} }
SP2T: Sparse Proxy Attention for Dual-stream Point Transformer-
[pdf]
[supp]
[bibtex]@InProceedings{Wan_2025_ICCV, author = {Wan, Jiaxu and Zhang, Hong and He, Ziqi and Deng, Yangyan and Wang, Qishu and Yuan, Ding and Yang, Yifan}, title = {SP2T: Sparse Proxy Attention for Dual-stream Point Transformer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27885-27895} }
Back