ICCV 2025 Open Access Repository

Papers

Back
kh: Symmetry Understanding of 3D Shapes via Chirality Disentanglement: Weikang Wang,

Tobias Weißberg,

Nafie El Amrani,

Florian Bernard; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Weikang and Wei{\ss}berg, Tobias and El Amrani, Nafie and Bernard, Florian}, title = {kh: Symmetry Understanding of 3D Shapes via Chirality Disentanglement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28292-28302} }
One Polyp Identifies All: One-Shot Polyp Segmentation with SAM via Cascaded Priors and Iterative Prompt Evolution: Xinyu Mao,

Xiaohan Xing,

Fei Meng,

Jianbang Liu,

Fan Bai,

Qiang Nie,

Max Meng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mao_2025_ICCV, author = {Mao, Xinyu and Xing, Xiaohan and Meng, Fei and Liu, Jianbang and Bai, Fan and Nie, Qiang and Meng, Max}, title = {One Polyp Identifies All: One-Shot Polyp Segmentation with SAM via Cascaded Priors and Iterative Prompt Evolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24182-24191} }
From Gaze to Movement: Predicting Visual Attention for Autonomous Driving Human-Machine Interaction based on Programmatic Imitation Learning: Yexin Huang,

Yongbin Lin,

Lishengsa Yue,

Zhihong Yao,

Jie Wang; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Yexin and Lin, Yongbin and Yue, Lishengsa and Yao, Zhihong and Wang, Jie}, title = {From Gaze to Movement: Predicting Visual Attention for Autonomous Driving Human-Machine Interaction based on Programmatic Imitation Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26146-26155} }
Rethinking Detecting Salient and Camouflaged Objects in Unconstrained Scenes: Zhangjun Zhou,

Yiping Li,

Chunlin Zhong,

Jianuo Huang,

Jialun Pei,

Hua Li,

He Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Zhangjun and Li, Yiping and Zhong, Chunlin and Huang, Jianuo and Pei, Jialun and Li, Hua and Tang, He}, title = {Rethinking Detecting Salient and Camouflaged Objects in Unconstrained Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22372-22382} }
OccluGaussian: Occlusion-Aware Gaussian Splatting for Large Scene Reconstruction and Rendering: Shiyong Liu,

Xiao Tang,

Zhihao Li,

Yingfan He,

Chongjie Ye,

Jianzhuang Liu,

Binxiao Huang,

Shunbo Zhou,

Xiaofei Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Shiyong and Tang, Xiao and Li, Zhihao and He, Yingfan and Ye, Chongjie and Liu, Jianzhuang and Huang, Binxiao and Zhou, Shunbo and Wu, Xiaofei}, title = {OccluGaussian: Occlusion-Aware Gaussian Splatting for Large Scene Reconstruction and Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26643-26652} }
Unsupervised RGB-D Point Cloud Registration for Scenes with Low Overlap and Photometric Inconsistency: Yejun Shou,

Haocheng Wang,

Lingfeng Shen,

Qian Zheng,

Gang Pan,

Yanlong Cao; [pdf] [supp]
[bibtex]
@InProceedings{Shou_2025_ICCV, author = {Shou, Yejun and Wang, Haocheng and Shen, Lingfeng and Zheng, Qian and Pan, Gang and Cao, Yanlong}, title = {Unsupervised RGB-D Point Cloud Registration for Scenes with Low Overlap and Photometric Inconsistency}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24868-24877} }
Demeter: A Parametric Model of Crop Plant Morphology from the Real World: Tianhang Cheng,

Albert J. Zhai,

Evan Z. Chen,

Rui Zhou,

Yawen Deng,

Zitong Li,

Kejie Zhao,

Janice Shiu,

Qianyu Zhao,

Yide Xu,

Xinlei Wang,

Yuan Shen,

Sheng Wang,

Lisa Ainsworth,

Kaiyu Guan,

Shenlong Wang; [pdf] [supp]
[bibtex]
@InProceedings{Cheng_2025_ICCV, author = {Cheng, Tianhang and Zhai, Albert J. and Chen, Evan Z. and Zhou, Rui and Deng, Yawen and Li, Zitong and Zhao, Kejie and Shiu, Janice and Zhao, Qianyu and Xu, Yide and Wang, Xinlei and Shen, Yuan and Wang, Sheng and Ainsworth, Lisa and Guan, Kaiyu and Wang, Shenlong}, title = {Demeter: A Parametric Model of Crop Plant Morphology from the Real World}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28740-28751} }
VideoLLaMB: Long Streaming Video Understanding with Recurrent Memory Bridges: Yuxuan Wang,

Yiqi Song,

Cihang Xie,

Yang Liu,

Zilong Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yuxuan and Song, Yiqi and Xie, Cihang and Liu, Yang and Zheng, Zilong}, title = {VideoLLaMB: Long Streaming Video Understanding with Recurrent Memory Bridges}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24170-24181} }
HiERO: Understanding the Hierarchy of Human Behavior Enhances Reasoning on Egocentric Videos: Simone Alberto Peirone,

Francesca Pistilli,

Giuseppe Averta; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peirone_2025_ICCV, author = {Peirone, Simone Alberto and Pistilli, Francesca and Averta, Giuseppe}, title = {HiERO: Understanding the Hierarchy of Human Behavior Enhances Reasoning on Egocentric Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19862-19871} }
FVGen: Accelerating Novel-View Synthesis with Adversarial Video Diffusion Distillation: Wenbin Teng,

Gonglin Chen,

Haiwei Chen,

Yajie Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Teng_2025_ICCV, author = {Teng, Wenbin and Chen, Gonglin and Chen, Haiwei and Zhao, Yajie}, title = {FVGen: Accelerating Novel-View Synthesis with Adversarial Video Diffusion Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26095-26105} }
ReconDreamer++: Harmonizing Generative and Reconstructive Models for Driving Scene Representation: Guosheng Zhao,

Xiaofeng Wang,

Chaojun Ni,

Zheng Zhu,

Wenkang Qin,

Guan Huang,

Xingang Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_ICCV, author = {Zhao, Guosheng and Wang, Xiaofeng and Ni, Chaojun and Zhu, Zheng and Qin, Wenkang and Huang, Guan and Wang, Xingang}, title = {ReconDreamer++: Harmonizing Generative and Reconstructive Models for Driving Scene Representation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26718-26728} }
Mind the Gap: Aligning Vision Foundation Models to Image Feature Matching: Yuhan Liu,

Jingwen Fu,

Yang Wu,

Kangyi Wu,

Pengna Li,

Jiayi Wu,

Sanping Zhou,

Jingmin Xin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Yuhan and Fu, Jingwen and Wu, Yang and Wu, Kangyi and Li, Pengna and Wu, Jiayi and Zhou, Sanping and Xin, Jingmin}, title = {Mind the Gap: Aligning Vision Foundation Models to Image Feature Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20313-20323} }
CoStoDet-DDPM: Collaborative Training of Stochastic and Deterministic Models Improves Surgical Workflow Anticipation and Recognition: Kaixiang Yang,

Xin Li,

Qiang Li,

Zhiwei Wang; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Kaixiang and Li, Xin and Li, Qiang and Wang, Zhiwei}, title = {CoStoDet-DDPM: Collaborative Training of Stochastic and Deterministic Models Improves Surgical Workflow Anticipation and Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23741-23751} }
RareCLIP: Rarity-aware Online Zero-shot Industrial Anomaly Detection: Jianfang He,

Min Cao,

Silong Peng,

Qiong Xie; [pdf] [supp]
[bibtex]
@InProceedings{He_2025_ICCV, author = {He, Jianfang and Cao, Min and Peng, Silong and Xie, Qiong}, title = {RareCLIP: Rarity-aware Online Zero-shot Industrial Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24478-24487} }
HERMES: A Unified Self-Driving World Model for Simultaneous 3D Scene Understanding and Generation: Xin Zhou,

Dingkang Liang,

Sifan Tu,

Xiwu Chen,

Yikang Ding,

Dingyuan Zhang,

Feiyang Tan,

Hengshuang Zhao,

Xiang Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Xin and Liang, Dingkang and Tu, Sifan and Chen, Xiwu and Ding, Yikang and Zhang, Dingyuan and Tan, Feiyang and Zhao, Hengshuang and Bai, Xiang}, title = {HERMES: A Unified Self-Driving World Model for Simultaneous 3D Scene Understanding and Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27817-27827} }
ArgMatch: Adaptive Refinement Gathering for Efficient Dense Matching: Yuxin Deng,

Kaining Zhang,

Linfeng Tang,

Jiaqi Yang,

Jiayi Ma; [pdf] [supp]
[bibtex]
@InProceedings{Deng_2025_ICCV, author = {Deng, Yuxin and Zhang, Kaining and Tang, Linfeng and Yang, Jiaqi and Ma, Jiayi}, title = {ArgMatch: Adaptive Refinement Gathering for Efficient Dense Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27369-27379} }
Free4D: Tuning-free 4D Scene Generation with Spatial-Temporal Consistency: Tianqi Liu,

Zihao Huang,

Zhaoxi Chen,

Guangcong Wang,

Shoukang Hu,

Liao Shen,

Huiqiang Sun,

Zhiguo Cao,

Wei Li,

Ziwei Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Tianqi and Huang, Zihao and Chen, Zhaoxi and Wang, Guangcong and Hu, Shoukang and Shen, Liao and Sun, Huiqiang and Cao, Zhiguo and Li, Wei and Liu, Ziwei}, title = {Free4D: Tuning-free 4D Scene Generation with Spatial-Temporal Consistency}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25571-25582} }
Spherical Epipolar Rectification for Deep Two-View Absolute Depth Estimation: Pierre-André Brousseau,

Sébastien Roy; [pdf] [supp]
[bibtex]
@InProceedings{Brousseau_2025_ICCV, author = {Brousseau, Pierre-Andr\'e and Roy, S\'ebastien}, title = {Spherical Epipolar Rectification for Deep Two-View Absolute Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28925-28934} }
ScenePainter: Semantically Consistent Perpetual 3D Scene Generation with Concept Relation Alignment: Chong Xia,

Shengjun Zhang,

Fangfu Liu,

Chang Liu,

Khodchaphun Hirunyaratsameewong,

Yueqi Duan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2025_ICCV, author = {Xia, Chong and Zhang, Shengjun and Liu, Fangfu and Liu, Chang and Hirunyaratsameewong, Khodchaphun and Duan, Yueqi}, title = {ScenePainter: Semantically Consistent Perpetual 3D Scene Generation with Concept Relation Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28808-28817} }
ESCNet:Edge-Semantic Collaborative Network for Camouflaged Object Detection: Sheng Ye,

Xin Chen,

Yan Zhang,

Xianming Lin,

Liujuan Cao; [pdf]
[bibtex]
@InProceedings{Ye_2025_ICCV, author = {Ye, Sheng and Chen, Xin and Zhang, Yan and Lin, Xianming and Cao, Liujuan}, title = {ESCNet:Edge-Semantic Collaborative Network for Camouflaged Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20053-20063} }
PixelStitch: Structure-Preserving Pixel-Wise Bidirectional Warps for Unsupervised Image Stitching: Hengzhe Jin,

Lang Nie,

Chunyu Lin,

Xiaomei Feng,

Yao Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Jin_2025_ICCV, author = {Jin, Hengzhe and Nie, Lang and Lin, Chunyu and Feng, Xiaomei and Zhao, Yao}, title = {PixelStitch: Structure-Preserving Pixel-Wise Bidirectional Warps for Unsupervised Image Stitching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28125-28134} }
VLR-Driver: Large Vision-Language-Reasoning Models for Embodied Autonomous Driving: Fanjie Kong,

Yitong Li,

Weihuang Chen,

Chen Min,

Yizhe Li,

Zhiqiang Gao,

Haoyang Li,

Zhongyu Guo,

Hongbin Sun; [pdf]
[bibtex]
@InProceedings{Kong_2025_ICCV, author = {Kong, Fanjie and Li, Yitong and Chen, Weihuang and Min, Chen and Li, Yizhe and Gao, Zhiqiang and Li, Haoyang and Guo, Zhongyu and Sun, Hongbin}, title = {VLR-Driver: Large Vision-Language-Reasoning Models for Embodied Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26966-26976} }
ResGS: Residual Densification of 3D Gaussian for Efficient Detail Recovery: Yanzhe Lyu,

Kai Cheng,

Xin Kang,

Xuejin Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lyu_2025_ICCV, author = {Lyu, Yanzhe and Cheng, Kai and Kang, Xin and Chen, Xuejin}, title = {ResGS: Residual Densification of 3D Gaussian for Efficient Detail Recovery}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28093-28102} }
Prior2Former - Evidential Modeling of Mask Transformers for Assumption-Free Open-World Panoptic Segmentation: Sebastian Schmidt,

Julius Koerner,

Dominik Fuchsgruber,

Stefano Gasperini,

Federico Tombari,

Stephan Günnemann; [pdf] [supp]
[bibtex]
@InProceedings{Schmidt_2025_ICCV, author = {Schmidt, Sebastian and Koerner, Julius and Fuchsgruber, Dominik and Gasperini, Stefano and Tombari, Federico and G\"unnemann, Stephan}, title = {Prior2Former - Evidential Modeling of Mask Transformers for Assumption-Free Open-World Panoptic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23646-23656} }
Optimal Transport for Brain-Image Alignment: Unveiling Redundancy and Synergy in Neural Information Processing: Yang Xiao,

Wang Lu,

Jie Ji,

Ruimeng Ye,

Gen Li,

Xiaolong Ma,

Bo Hui; [pdf] [arXiv]
[bibtex]
@InProceedings{Xiao_2025_ICCV, author = {Xiao, Yang and Lu, Wang and Ji, Jie and Ye, Ruimeng and Li, Gen and Ma, Xiaolong and Hui, Bo}, title = {Optimal Transport for Brain-Image Alignment: Unveiling Redundancy and Synergy in Neural Information Processing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20445-20455} }
E-SAM: Training-Free Segment Every Entity Model: Weiming Zhang,

Dingwen Xiao,

Lei Chen,

Lin Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Weiming and Xiao, Dingwen and Chen, Lei and Wang, Lin}, title = {E-SAM: Training-Free Segment Every Entity Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24688-24697} }
RobuSTereo: Robust Zero-Shot Stereo Matching under Adverse Weather: Yuran Wang,

Yingping Liang,

Yutao Hu,

Ying Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yuran and Liang, Yingping and Hu, Yutao and Fu, Ying}, title = {RobuSTereo: Robust Zero-Shot Stereo Matching under Adverse Weather}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25134-25144} }
HyPiDecoder: Hybrid Pixel Decoder for Efficient Segmentation and Detection: Fengzhe Zhou,

Humphrey Shi; [pdf]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Fengzhe and Shi, Humphrey}, title = {HyPiDecoder: Hybrid Pixel Decoder for Efficient Segmentation and Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22100-22109} }
Feature Purification Matters: Suppressing Outlier Propagation for Training-Free Open-Vocabulary Semantic Segmentation: Shuo Jin,

Siyue Yu,

Bingfeng Zhang,

Mingjie Sun,

Yi Dong,

Jimin Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Jin_2025_ICCV, author = {Jin, Shuo and Yu, Siyue and Zhang, Bingfeng and Sun, Mingjie and Dong, Yi and Xiao, Jimin}, title = {Feature Purification Matters: Suppressing Outlier Propagation for Training-Free Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20291-20300} }
SparseLaneSTP: Leveraging Spatio-Temporal Priors with Sparse Transformers for 3D Lane Detection: Maximilian Pittner,

Joel Janai,

Mario Faigle,

Alexandru Paul Condurache; [pdf] [supp]
[bibtex]
@InProceedings{Pittner_2025_ICCV, author = {Pittner, Maximilian and Janai, Joel and Faigle, Mario and Condurache, Alexandru Paul}, title = {SparseLaneSTP: Leveraging Spatio-Temporal Priors with Sparse Transformers for 3D Lane Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29099-29109} }
Semantic Causality-Aware Vision-Based 3D Occupancy Prediction: Dubing Chen,

Huan Zheng,

Yucheng Zhou,

Xianfei Li,

Wenlong Liao,

Tao He,

Pai Peng,

Jianbing Shen; [pdf] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Dubing and Zheng, Huan and Zhou, Yucheng and Li, Xianfei and Liao, Wenlong and He, Tao and Peng, Pai and Shen, Jianbing}, title = {Semantic Causality-Aware Vision-Based 3D Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24878-24888} }
OmniSAM: Omnidirectional Segment Anything Model for UDA in Panoramic Semantic Segmentation: Ding Zhong,

Xu Zheng,

Chenfei Liao,

Yuanhuiyi Lyu,

Jialei Chen,

Shengyang Wu,

Linfeng Zhang,

Xuming Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhong_2025_ICCV, author = {Zhong, Ding and Zheng, Xu and Liao, Chenfei and Lyu, Yuanhuiyi and Chen, Jialei and Wu, Shengyang and Zhang, Linfeng and Hu, Xuming}, title = {OmniSAM: Omnidirectional Segment Anything Model for UDA in Panoramic Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23892-23901} }
Probabilistic Inertial Poser (ProbIP): Uncertainty-aware Human Motion Modeling from Sparse Inertial Sensors: Min Kim,

Younho Jeon,

Sungho Jo; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Min and Jeon, Younho and Jo, Sungho}, title = {Probabilistic Inertial Poser (ProbIP): Uncertainty-aware Human Motion Modeling from Sparse Inertial Sensors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25893-25902} }
RIOcc: Efficient Cross-Modal Fusion Transformer with Collaborative Feature Refinement for 3D Semantic Occupancy Prediction: Baojie Fan,

Xiaotian Li,

Yuhan Zhou,

Yuyu Jiang,

Jiandong Tian,

Huijie Fan; [pdf] [supp]
[bibtex]
@InProceedings{Fan_2025_ICCV, author = {Fan, Baojie and Li, Xiaotian and Zhou, Yuhan and Jiang, Yuyu and Tian, Jiandong and Fan, Huijie}, title = {RIOcc: Efficient Cross-Modal Fusion Transformer with Collaborative Feature Refinement for 3D Semantic Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25851-25861} }
Anomaly Detection of Integrated Circuits Package Substrates Using the Large Vision Model SAIC: Dataset Construction, Methodology, and Application: Ruiyun Yu,

Bingyang Guo,

Haoyuan Li; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Ruiyun and Guo, Bingyang and Li, Haoyuan}, title = {Anomaly Detection of Integrated Circuits Package Substrates Using the Large Vision Model SAIC: Dataset Construction, Methodology, and Application}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22563-22574} }
Refer to Any Segmentation Mask Group With Vision-Language Prompts: Shengcao Cao,

Zijun Wei,

Jason Kuen,

Kangning Liu,

Lingzhi Zhang,

Jiuxiang Gu,

HyunJoon Jung,

Liang-Yan Gui,

Yu-Xiong Wang; [pdf] [supp]
[bibtex]
@InProceedings{Cao_2025_ICCV, author = {Cao, Shengcao and Wei, Zijun and Kuen, Jason and Liu, Kangning and Zhang, Lingzhi and Gu, Jiuxiang and Jung, HyunJoon and Gui, Liang-Yan and Wang, Yu-Xiong}, title = {Refer to Any Segmentation Mask Group With Vision-Language Prompts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21853-21863} }
Recover Biological Structure from Sparse-View Diffraction Images with Neural Volumetric Prior: Renzhi He,

Haowen Zhou,

Yubei Chen,

Yi Xue; [pdf] [supp]
[bibtex]
@InProceedings{He_2025_ICCV, author = {He, Renzhi and Zhou, Haowen and Chen, Yubei and Xue, Yi}, title = {Recover Biological Structure from Sparse-View Diffraction Images with Neural Volumetric Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27771-27782} }
SpatialSplat: Efficient Semantic 3D from Sparse Unposed Images: Yu Sheng,

Jiajun Deng,

Xinran Zhang,

Yu Zhang,

Bei Hua,

Yanyong Zhang,

Jianmin Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sheng_2025_ICCV, author = {Sheng, Yu and Deng, Jiajun and Zhang, Xinran and Zhang, Yu and Hua, Bei and Zhang, Yanyong and Ji, Jianmin}, title = {SpatialSplat: Efficient Semantic 3D from Sparse Unposed Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26404-26414} }
ConsistentCity: Semantic Flow-guided Occupancy DiT for Temporally Consistent Driving Scene Synthesis: Benjin Zhu,

Xiaogang Wang,

Hongsheng Li; [pdf]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Benjin and Wang, Xiaogang and Li, Hongsheng}, title = {ConsistentCity: Semantic Flow-guided Occupancy DiT for Temporally Consistent Driving Scene Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26382-26392} }
G2PDiffusion: Cross-Species Genotype-to-Phenotype Prediction via Evolutionary Diffusion: Mengdi Liu,

Zhangyang Gao,

Hong Chang,

Stan Z. Li,

Shiguang Shan,

Xilin Chen; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Mengdi and Gao, Zhangyang and Chang, Hong and Li, Stan Z. and Shan, Shiguang and Chen, Xilin}, title = {G2PDiffusion: Cross-Species Genotype-to-Phenotype Prediction via Evolutionary Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20705-20714} }
PathFinder: A Multi-Modal Multi-Agent System for Medical Diagnostic Decision-Making Applied to Histopathology: Fatemeh Ghezloo,

Mehmet Saygin Seyfioglu,

Rustin Soraki,

Wisdom O. Ikezogwo,

Beibin Li,

Tejoram Vivekanandan,

Joann G. Elmore,

Ranjay Krishna,

Linda Shapiro; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ghezloo_2025_ICCV, author = {Ghezloo, Fatemeh and Seyfioglu, Mehmet Saygin and Soraki, Rustin and Ikezogwo, Wisdom O. and Li, Beibin and Vivekanandan, Tejoram and Elmore, Joann G. and Krishna, Ranjay and Shapiro, Linda}, title = {PathFinder: A Multi-Modal Multi-Agent System for Medical Diagnostic Decision-Making Applied to Histopathology}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23431-23441} }
MolParser: End-to-end Visual Recognition of Molecule Structures in the Wild: Xi Fang,

Jiankun Wang,

Xiaochen Cai,

Shangqian Chen,

Shuwen Yang,

Haoyi Tao,

Nan Wang,

Lin Yao,

Linfeng Zhang,

Guolin Ke; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2025_ICCV, author = {Fang, Xi and Wang, Jiankun and Cai, Xiaochen and Chen, Shangqian and Yang, Shuwen and Tao, Haoyi and Wang, Nan and Yao, Lin and Zhang, Linfeng and Ke, Guolin}, title = {MolParser: End-to-end Visual Recognition of Molecule Structures in the Wild}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24528-24538} }
VideoMiner: Iteratively Grounding Key Frames of Hour-Long Videos via Tree-based Group Relative Policy Optimization: Xinye Cao,

Hongcan Guo,

Jiawen Qian,

Guoshun Nan,

Chao Wang,

Yuqi Pan,

Tianhao Hou,

Xiaojuan Wang,

Yutong Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2025_ICCV, author = {Cao, Xinye and Guo, Hongcan and Qian, Jiawen and Nan, Guoshun and Wang, Chao and Pan, Yuqi and Hou, Tianhao and Wang, Xiaojuan and Gao, Yutong}, title = {VideoMiner: Iteratively Grounding Key Frames of Hour-Long Videos via Tree-based Group Relative Policy Optimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23773-23783} }
FROSS: Faster-Than-Real-Time Online 3D Semantic Scene Graph Generation from RGB-D Images: Hao-Yu Hou,

Chun-Yi Lee,

Motoharu Sonogashira,

Yasutomo Kawanishi; [pdf] [supp]
[bibtex]
@InProceedings{Hou_2025_ICCV, author = {Hou, Hao-Yu and Lee, Chun-Yi and Sonogashira, Motoharu and Kawanishi, Yasutomo}, title = {FROSS: Faster-Than-Real-Time Online 3D Semantic Scene Graph Generation from RGB-D Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28818-28827} }
HVPUNet: Hybrid-Voxel Point-cloud Upsampling Network: Juhyung Ha,

Vibhas Kumar Vats,

Soon-heung Jung,

Alimoor Reza,

David J. Crandall; [pdf]
[bibtex]
@InProceedings{Ha_2025_ICCV, author = {Ha, Juhyung and Vats, Vibhas Kumar and Jung, Soon-heung and Reza, Alimoor and Crandall, David J.}, title = {HVPUNet: Hybrid-Voxel Point-cloud Upsampling Network}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29153-29162} }
Growing a Twig to Accelerate Large Vision-Language Models: Zhenwei Shao,

Mingyang Wang,

Zhou Yu,

Wenwen Pan,

Yan Yang,

Tao Wei,

Hongyuan Zhang,

Ning Mao,

Wei Chen,

Jun Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shao_2025_ICCV, author = {Shao, Zhenwei and Wang, Mingyang and Yu, Zhou and Pan, Wenwen and Yang, Yan and Wei, Tao and Zhang, Hongyuan and Mao, Ning and Chen, Wei and Yu, Jun}, title = {Growing a Twig to Accelerate Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20064-20074} }
Controllable Latent Space Augmentation for Digital Pathology: Sofiène Boutaj,

Marin Scalbert,

Pierre Marza,

Florent Couzinie-Devy,

Maria Vakalopoulou,

Stergios Christodoulidis; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Boutaj_2025_ICCV, author = {Boutaj, Sofi\`ene and Scalbert, Marin and Marza, Pierre and Couzinie-Devy, Florent and Vakalopoulou, Maria and Christodoulidis, Stergios}, title = {Controllable Latent Space Augmentation for Digital Pathology}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22165-22174} }
WINS: Winograd Structured Pruning for Fast Winograd Convolution: Cheonjun Park,

Hyun Jae Oh,

Mincheol Park,

Hyunchan Moon,

Minsik Kim,

Suhyun Kim,

Myung Kuk Yoon,

Won Woo Ro; [pdf] [supp]
[bibtex]
@InProceedings{Park_2025_ICCV, author = {Park, Cheonjun and Oh, Hyun Jae and Park, Mincheol and Moon, Hyunchan and Kim, Minsik and Kim, Suhyun and Yoon, Myung Kuk and Ro, Won Woo}, title = {WINS: Winograd Structured Pruning for Fast Winograd Convolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22477-22487} }
RayGaussX: Accelerating Gaussian-Based Ray Marching for Real-Time and High-Quality Novel View Synthesis: Hugo Blanc,

Jean-Emmanuel Deschaud,

Alexis Paljic; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Blanc_2025_ICCV, author = {Blanc, Hugo and Deschaud, Jean-Emmanuel and Paljic, Alexis}, title = {RayGaussX: Accelerating Gaussian-Based Ray Marching for Real-Time and High-Quality Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27575-27584} }
CoopTrack: Exploring End-to-End Learning for Efficient Cooperative Sequential Perception: Jiaru Zhong,

Jiahao Wang,

Jiahui Xu,

Xiaofan Li,

Zaiqing Nie,

Haibao Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhong_2025_ICCV, author = {Zhong, Jiaru and Wang, Jiahao and Xu, Jiahui and Li, Xiaofan and Nie, Zaiqing and Yu, Haibao}, title = {CoopTrack: Exploring End-to-End Learning for Efficient Cooperative Sequential Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26954-26965} }
ContraGS: Codebook-Condensed and Trainable Gaussian Splatting for Fast, Memory-Efficient Reconstruction: Sankeerth Durvasula,

Sharanshangar Muhunthan,

Zain Moustafa,

Richard Chen,

Ruofan Liang,

Yushi Guan,

Nilesh Ahuja,

Nilesh Jain,

Selvakumar Panneer,

Nandita Vijaykumar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Durvasula_2025_ICCV, author = {Durvasula, Sankeerth and Muhunthan, Sharanshangar and Moustafa, Zain and Chen, Richard and Liang, Ruofan and Guan, Yushi and Ahuja, Nilesh and Jain, Nilesh and Panneer, Selvakumar and Vijaykumar, Nandita}, title = {ContraGS: Codebook-Condensed and Trainable Gaussian Splatting for Fast, Memory-Efficient Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28935-28945} }
NATRA: Noise-Agnostic Framework for Trajectory Prediction with Noisy Observations: Rongqing Li,

Changsheng Li,

Ruilin Lv,

Yuhang Li,

Yang Gao,

Xiaolu Zhang,

Jun Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Rongqing and Li, Changsheng and Lv, Ruilin and Li, Yuhang and Gao, Yang and Zhang, Xiaolu and Zhou, Jun}, title = {NATRA: Noise-Agnostic Framework for Trajectory Prediction with Noisy Observations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27872-27884} }
ModalTune: Fine-Tuning Slide-Level Foundation Models with Multi-Modal Information for Multi-task Learning in Digital Pathology: Vishwesh Ramanathan,

Tony Xu,

Pushpak Pati,

Faruk Ahmed,

Maged Goubran,

Anne L. Martel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ramanathan_2025_ICCV, author = {Ramanathan, Vishwesh and Xu, Tony and Pati, Pushpak and Ahmed, Faruk and Goubran, Maged and Martel, Anne L.}, title = {ModalTune: Fine-Tuning Slide-Level Foundation Models with Multi-Modal Information for Multi-task Learning in Digital Pathology}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23912-23923} }
Harnessing Vision Foundation Models for High-Performance, Training-Free Open Vocabulary Segmentation: Yuheng Shi,

Minjing Dong,

Chang Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2025_ICCV, author = {Shi, Yuheng and Dong, Minjing and Xu, Chang}, title = {Harnessing Vision Foundation Models for High-Performance, Training-Free Open Vocabulary Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23487-23497} }
MinCD-PnP: Learning 2D-3D Correspondences with Approximate Blind PnP: Pei An,

Jiaqi Yang,

Muyao Peng,

You Yang,

Qiong Liu,

Xiaolin Wu,

Liangliang Nan; [pdf] [supp]
[bibtex]
@InProceedings{An_2025_ICCV, author = {An, Pei and Yang, Jiaqi and Peng, Muyao and Yang, You and Liu, Qiong and Wu, Xiaolin and Nan, Liangliang}, title = {MinCD-PnP: Learning 2D-3D Correspondences with Approximate Blind PnP}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26519-26528} }
Hallucinatory Image Tokens: A Training-free EAZY Approach to Detecting and Mitigating Object Hallucinations in LVLMs: Liwei Che,

Tony Qingze Liu,

Jing Jia,

Weiyi Qin,

Ruixiang Tang,

Vladimir Pavlovic; [pdf] [supp]
[bibtex]
@InProceedings{Che_2025_ICCV, author = {Che, Liwei and Liu, Tony Qingze and Jia, Jing and Qin, Weiyi and Tang, Ruixiang and Pavlovic, Vladimir}, title = {Hallucinatory Image Tokens: A Training-free EAZY Approach to Detecting and Mitigating Object Hallucinations in LVLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21635-21644} }
MagicDrive-V2: High-Resolution Long Video Generation for Autonomous Driving with Adaptive Control: Ruiyuan Gao,

Kai Chen,

Bo Xiao,

Lanqing Hong,

Zhenguo Li,

Qiang Xu; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2025_ICCV, author = {Gao, Ruiyuan and Chen, Kai and Xiao, Bo and Hong, Lanqing and Li, Zhenguo and Xu, Qiang}, title = {MagicDrive-V2: High-Resolution Long Video Generation for Autonomous Driving with Adaptive Control}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28135-28144} }
Communication-Efficient Multi-Vehicle Collaborative Semantic Segmentation via Sparse 3D Gaussian Sharing: Tianyu Hong,

Xiaobo Zhou,

Wenkai Hu,

Qi Xie,

Zhihui Ke,

Tie Qiu; [pdf] [supp]
[bibtex]
@InProceedings{Hong_2025_ICCV, author = {Hong, Tianyu and Zhou, Xiaobo and Hu, Wenkai and Xie, Qi and Ke, Zhihui and Qiu, Tie}, title = {Communication-Efficient Multi-Vehicle Collaborative Semantic Segmentation via Sparse 3D Gaussian Sharing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28622-28631} }
Sparse-Dense Side-Tuner for efficient Video Temporal Grounding: David Pujol-Perich,

Sergio Escalera,

Albert Clapés; [pdf] [supp]
[bibtex]
@InProceedings{Pujol-Perich_2025_ICCV, author = {Pujol-Perich, David and Escalera, Sergio and Clap\'es, Albert}, title = {Sparse-Dense Side-Tuner for efficient Video Temporal Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21515-21524} }
CompCap: Improving Multimodal Large Language Models with Composite Captions: Xiaohui Chen,

Satya Narayan Shukla,

Mahmoud Azab,

Aashu Singh,

Qifan Wang,

David Yang,

ShengYun Peng,

Hanchao Yu,

Shen Yan,

Xuewen Zhang,

Baosheng He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Xiaohui and Shukla, Satya Narayan and Azab, Mahmoud and Singh, Aashu and Wang, Qifan and Yang, David and Peng, ShengYun and Yu, Hanchao and Yan, Shen and Zhang, Xuewen and He, Baosheng}, title = {CompCap: Improving Multimodal Large Language Models with Composite Captions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23582-23592} }
Describe, Adapt and Combine: Empowering CLIP Encoders for Open-set 3D Object Retrieval: Zhichuan Wang,

Yang Zhou,

Zhe Liu,

Rui Yu,

Song Bai,

Yulong Wang,

Xinwei He,

Xiang Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Zhichuan and Zhou, Yang and Liu, Zhe and Yu, Rui and Bai, Song and Wang, Yulong and He, Xinwei and Bai, Xiang}, title = {Describe, Adapt and Combine: Empowering CLIP Encoders for Open-set 3D Object Retrieval}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21026-21036} }
Embodied Image Captioning: Self-supervised Learning Agents for Spatially Coherent Image Descriptions: Tommaso Galliena,

Tommaso Apicella,

Stefano Rosa,

Pietro Morerio,

Alessio Del Bue,

Lorenzo Natale; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Galliena_2025_ICCV, author = {Galliena, Tommaso and Apicella, Tommaso and Rosa, Stefano and Morerio, Pietro and Del Bue, Alessio and Natale, Lorenzo}, title = {Embodied Image Captioning: Self-supervised Learning Agents for Spatially Coherent Image Descriptions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24370-24379} }
Adapt Foundational Segmentation Models with Heterogeneous Searching Space: Li Yi,

Jie Hu,

Songan Zhang,

Guannan Jiang; [pdf] [supp]
[bibtex]
@InProceedings{Yi_2025_ICCV, author = {Yi, Li and Hu, Jie and Zhang, Songan and Jiang, Guannan}, title = {Adapt Foundational Segmentation Models with Heterogeneous Searching Space}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23364-23373} }
Adversarial Exploitation of Data Diversity Improves Visual Localization: Sihang Li,

Siqi Tan,

Bowen Chang,

Jing Zhang,

Chen Feng,

Yiming Li; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Sihang and Tan, Siqi and Chang, Bowen and Zhang, Jing and Feng, Chen and Li, Yiming}, title = {Adversarial Exploitation of Data Diversity Improves Visual Localization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26848-26858} }
LawDIS: Language-Window-based Controllable Dichotomous Image Segmentation: Xinyu Yan,

Meijun Sun,

Ge-Peng Ji,

Fahad Shahbaz Khan,

Salman Khan,

Deng-Ping Fan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2025_ICCV, author = {Yan, Xinyu and Sun, Meijun and Ji, Ge-Peng and Khan, Fahad Shahbaz and Khan, Salman and Fan, Deng-Ping}, title = {LawDIS: Language-Window-based Controllable Dichotomous Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23902-23911} }
HERMES: temporal-coHERent long-forM understanding with Episodes and Semantics: Gueter Josmy Faure,

Jia-Fong Yeh,

Min-Hung Chen,

Hung-Ting Su,

Shang-Hong Lai,

Winston H. Hsu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Faure_2025_ICCV, author = {Faure, Gueter Josmy and Yeh, Jia-Fong and Chen, Min-Hung and Su, Hung-Ting and Lai, Shang-Hong and Hsu, Winston H.}, title = {HERMES: temporal-coHERent long-forM understanding with Episodes and Semantics}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22911-22921} }
CoralSRT: Revisiting Coral Reef Semantic Segmentation by Feature Rectification via Self-supervised Guidance: Ziqiang Zheng,

Yuk-Kwan Wong,

Binh-Son Hua,

Jianbo Shi,

Sai-Kit Yeung; [pdf] [supp]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Ziqiang and Wong, Yuk-Kwan and Hua, Binh-Son and Shi, Jianbo and Yeung, Sai-Kit}, title = {CoralSRT: Revisiting Coral Reef Semantic Segmentation by Feature Rectification via Self-supervised Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19967-19977} }
Omni-scene Perception-oriented Point Cloud Geometry Enhancement for Coordinate Quantization: Wang Liu,

Wei Gao; [pdf]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Wang and Gao, Wei}, title = {Omni-scene Perception-oriented Point Cloud Geometry Enhancement for Coordinate Quantization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26055-26064} }
B-VLLM: A Vision Large Language Model with Balanced Spatio-Temporal Tokens: Zhuqiang Lu,

Zhenfei Yin,

Mengwei He,

Zhihui Wang,

Zicheng Liu,

Zhiyong Wang,

Kun Hu; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Zhuqiang and Yin, Zhenfei and He, Mengwei and Wang, Zhihui and Liu, Zicheng and Wang, Zhiyong and Hu, Kun}, title = {B-VLLM: A Vision Large Language Model with Balanced Spatio-Temporal Tokens}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24549-24558} }
SL2A-INR: Single-Layer Learnable Activation for Implicit Neural Representation: Reza Rezaeian,

Moein Heidari,

Reza Azad,

Dorit Merhof,

Hamid Soltanian-Zadeh,

Ilker Hacihaliloglu; [pdf] [supp]
[bibtex]
@InProceedings{Rezaeian_2025_ICCV, author = {Rezaeian, Reza and Heidari, Moein and Azad, Reza and Merhof, Dorit and Soltanian-Zadeh, Hamid and Hacihaliloglu, Ilker}, title = {SL2A-INR: Single-Layer Learnable Activation for Implicit Neural Representation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26065-26074} }
Authentic 4D Driving Simulation with a Video Generation Model: Lening Wang,

Wenzhao Zheng,

Dalong Du,

Yunpeng Zhang,

Yilong Ren,

Han Jiang,

Zhiyong Cui,

Haiyang Yu,

Jie Zhou,

Shanghang Zhang; [pdf]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Lening and Zheng, Wenzhao and Du, Dalong and Zhang, Yunpeng and Ren, Yilong and Jiang, Han and Cui, Zhiyong and Yu, Haiyang and Zhou, Jie and Zhang, Shanghang}, title = {Authentic 4D Driving Simulation with a Video Generation Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28892-28902} }
Curve-Aware Gaussian Splatting for 3D Parametric Curve Reconstruction: Zhirui Gao,

Renjiao Yi,

Yaqiao Dai,

Xuening Zhu,

Wei Chen,

Chenyang Zhu,

Kai Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_ICCV, author = {Gao, Zhirui and Yi, Renjiao and Dai, Yaqiao and Zhu, Xuening and Chen, Wei and Zhu, Chenyang and Xu, Kai}, title = {Curve-Aware Gaussian Splatting for 3D Parametric Curve Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27531-27541} }
Sim-DETR: Unlock DETR for Temporal Sentence Grounding: Jiajin Tang,

Zhengxuan Wei,

Yuchen Zhu,

Cheng Shi,

Guanbin Li,

Liang Lin,

Sibei Yang; [pdf]
[bibtex]
@InProceedings{Tang_2025_ICCV, author = {Tang, Jiajin and Wei, Zhengxuan and Zhu, Yuchen and Shi, Cheng and Li, Guanbin and Lin, Liang and Yang, Sibei}, title = {Sim-DETR: Unlock DETR for Temporal Sentence Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22760-22771} }
METEOR: Multi-Encoder Collaborative Token Pruning for Efficient Vision Language Models: Yuchen Liu,

Yaoming Wang,

Bowen Shi,

Xiaopeng Zhang,

Wenrui Dai,

Chenglin Li,

Hongkai Xiong,

Qi Tian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Yuchen and Wang, Yaoming and Shi, Bowen and Zhang, Xiaopeng and Dai, Wenrui and Li, Chenglin and Xiong, Hongkai and Tian, Qi}, title = {METEOR: Multi-Encoder Collaborative Token Pruning for Efficient Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21492-21504} }
SeqGrowGraph: Learning Lane Topology as a Chain of Graph Expansions: Mengwei Xie,

Shuang Zeng,

Xinyuan Chang,

Xinran Liu,

Zheng Pan,

Mu Xu,

Xing Wei; [pdf] [arXiv]
[bibtex]
@InProceedings{Xie_2025_ICCV, author = {Xie, Mengwei and Zeng, Shuang and Chang, Xinyuan and Liu, Xinran and Pan, Zheng and Xu, Mu and Wei, Xing}, title = {SeqGrowGraph: Learning Lane Topology as a Chain of Graph Expansions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27166-27175} }
Neural Compression for 3D Geometry Sets: Siyu Ren,

Junhui Hou,

Weiyao Lin,

Wenping Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2025_ICCV, author = {Ren, Siyu and Hou, Junhui and Lin, Weiyao and Wang, Wenping}, title = {Neural Compression for 3D Geometry Sets}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25294-25304} }
Lifting the Structural Morphing for Wide-Angle Images Rectification: Unified Content and Boundary Modeling: Wenting Luan,

Siqi Lu,

Yongbin Zheng,

Wanying Xu,

Lang Nie,

Zongtan Zhou,

Kang Liao; [pdf] [supp]
[bibtex]
@InProceedings{Luan_2025_ICCV, author = {Luan, Wenting and Lu, Siqi and Zheng, Yongbin and Xu, Wanying and Nie, Lang and Zhou, Zongtan and Liao, Kang}, title = {Lifting the Structural Morphing for Wide-Angle Images Rectification: Unified Content and Boundary Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25529-25538} }
ASGS: Single-Domain Generalizable Open-Set Object Detection via Adaptive Subgraph Searching: Yuxuan Yuan,

Luyao Tang,

Yixin Chen,

Chaoqi Chen,

Yue Huang,

Xinghao Ding; [pdf] [supp]
[bibtex]
@InProceedings{Yuan_2025_ICCV, author = {Yuan, Yuxuan and Tang, Luyao and Chen, Yixin and Chen, Chaoqi and Huang, Yue and Ding, Xinghao}, title = {ASGS: Single-Domain Generalizable Open-Set Object Detection via Adaptive Subgraph Searching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20911-20921} }
UniConvNet: Expanding Effective Receptive Field while Maintaining Asymptotically Gaussian Distribution for ConvNets of Any Scale: Yuhao Wang,

Wei Xi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yuhao and Xi, Wei}, title = {UniConvNet: Expanding Effective Receptive Field while Maintaining Asymptotically Gaussian Distribution for ConvNets of Any Scale}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20922-20933} }
Cross-View Isolated Sign Language Recognition via View Synthesis and Feature Disentanglement: Xin Shen,

Xinyu Wang,

Lei Shen,

Kaihao Zhang,

Xin Yu; [pdf] [supp]
[bibtex]
@InProceedings{Shen_2025_ICCV, author = {Shen, Xin and Wang, Xinyu and Shen, Lei and Zhang, Kaihao and Yu, Xin}, title = {Cross-View Isolated Sign Language Recognition via View Synthesis and Feature Disentanglement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20647-20657} }
3DGS-LM: Faster Gaussian-Splatting Optimization with Levenberg-Marquardt: Lukas Höllein,

Aljaž Božič,

Michael Zollhöfer,

Matthias Nießner; [pdf] [supp]
[bibtex]
@InProceedings{Hollein_2025_ICCV, author = {H\"ollein, Lukas and Bo\v{z}i\v{c}, Alja\v{z} and Zollh\"ofer, Michael and Nie{\ss}ner, Matthias}, title = {3DGS-LM: Faster Gaussian-Splatting Optimization with Levenberg-Marquardt}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26740-26750} }
GauUpdate: New Object Insertion in 3D Gaussian Fields with Consistent Global Illumination: Chengwei Ren,

Fan Zhang,

Liangchao Xu,

Liang Pan,

Ziwei Liu,

Wenping Wang,

Xiao-Ping Zhang,

Yuan Liu; [pdf] [supp]
[bibtex]
@InProceedings{Ren_2025_ICCV, author = {Ren, Chengwei and Zhang, Fan and Xu, Liangchao and Pan, Liang and Liu, Ziwei and Wang, Wenping and Zhang, Xiao-Ping and Liu, Yuan}, title = {GauUpdate: New Object Insertion in 3D Gaussian Fields with Consistent Global Illumination}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28653-28663} }
OphCLIP: Hierarchical Retrieval-Augmented Learning for Ophthalmic Surgical Video-Language Pretraining: Ming Hu,

Kun Yuan,

Yaling Shen,

Feilong Tang,

Xiaohao Xu,

Lin Zhou,

Wei Li,

Ying Chen,

Zhongxing Xu,

Zelin Peng,

Siyuan Yan,

Vinkle Srivastav,

Diping Song,

Tianbin Li,

Danli Shi,

Jin Ye,

Nicolas Padoy,

Nassir Navab,

Junjun He,

Zongyuan Ge; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_ICCV, author = {Hu, Ming and Yuan, Kun and Shen, Yaling and Tang, Feilong and Xu, Xiaohao and Zhou, Lin and Li, Wei and Chen, Ying and Xu, Zhongxing and Peng, Zelin and Yan, Siyuan and Srivastav, Vinkle and Song, Diping and Li, Tianbin and Shi, Danli and Ye, Jin and Padoy, Nicolas and Navab, Nassir and He, Junjun and Ge, Zongyuan}, title = {OphCLIP: Hierarchical Retrieval-Augmented Learning for Ophthalmic Surgical Video-Language Pretraining}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19838-19849} }
IM360: Large-scale Indoor Mapping with 360 Cameras: Dongki Jung,

Jaehoon Choi,

Yonghan Lee,

Dinesh Manocha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jung_2025_ICCV, author = {Jung, Dongki and Choi, Jaehoon and Lee, Yonghan and Manocha, Dinesh}, title = {IM360: Large-scale Indoor Mapping with 360 Cameras}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29040-29050} }
SU-RGS: Relightable 3D Gaussian Splatting from Sparse Views under Unconstrained Illuminations: Qi Zhang,

Chi Huang,

Qian Zhang,

Nan Li,

Wei Feng; [pdf]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Qi and Huang, Chi and Zhang, Qian and Li, Nan and Feng, Wei}, title = {SU-RGS: Relightable 3D Gaussian Splatting from Sparse Views under Unconstrained Illuminations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26859-26868} }
Representation Shift: Unifying Token Compression with FlashAttention: Joonmyung Choi,

Sanghyeok Lee,

Byungoh Ko,

Eunseo Kim,

Jihyung Kil,

Hyunwoo J. Kim; [pdf] [arXiv]
[bibtex]
@InProceedings{Choi_2025_ICCV, author = {Choi, Joonmyung and Lee, Sanghyeok and Ko, Byungoh and Kim, Eunseo and Kil, Jihyung and Kim, Hyunwoo J.}, title = {Representation Shift: Unifying Token Compression with FlashAttention}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20456-20466} }
GenHancer: Imperfect Generative Models are Secretly Strong Vision-Centric Enhancers: Shijie Ma,

Yuying Ge,

Teng Wang,

Yuxin Guo,

Yixiao Ge,

Ying Shan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_ICCV, author = {Ma, Shijie and Ge, Yuying and Wang, Teng and Guo, Yuxin and Ge, Yixiao and Shan, Ying}, title = {GenHancer: Imperfect Generative Models are Secretly Strong Vision-Centric Enhancers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24402-24412} }
MINERVA: Evaluating Complex Video Reasoning: Arsha Nagrani,

Sachit Menon,

Ahmet Iscen,

Shyamal Buch,

Ramin Mehran,

Nilpa Jha,

Anja Hauth,

Yukun Zhu,

Carl Vondrick,

Mikhail Sirotenko,

Cordelia Schmid,

Tobias Weyand; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nagrani_2025_ICCV, author = {Nagrani, Arsha and Menon, Sachit and Iscen, Ahmet and Buch, Shyamal and Mehran, Ramin and Jha, Nilpa and Hauth, Anja and Zhu, Yukun and Vondrick, Carl and Sirotenko, Mikhail and Schmid, Cordelia and Weyand, Tobias}, title = {MINERVA: Evaluating Complex Video Reasoning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23968-23978} }
Rectifying Magnitude Neglect in Linear Attention: Qihang Fan,

Huaibo Huang,

Yuang Ai,

Ran He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2025_ICCV, author = {Fan, Qihang and Huang, Huaibo and Ai, Yuang and He, Ran}, title = {Rectifying Magnitude Neglect in Linear Attention}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21505-21514} }
GEMeX: A Large-Scale, Groundable, and Explainable Medical VQA Benchmark for Chest X-ray Diagnosis: Bo Liu,

Ke Zou,

Li-Ming Zhan,

Zexin Lu,

Xiaoyu Dong,

Yidi Chen,

Chengqiang Xie,

Jiannong Cao,

Xiao-Ming Wu,

Huazhu Fu; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Bo and Zou, Ke and Zhan, Li-Ming and Lu, Zexin and Dong, Xiaoyu and Chen, Yidi and Xie, Chengqiang and Cao, Jiannong and Wu, Xiao-Ming and Fu, Huazhu}, title = {GEMeX: A Large-Scale, Groundable, and Explainable Medical VQA Benchmark for Chest X-ray Diagnosis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21310-21320} }
M-Net: MRI Brain Tumor Sequential Segmentation Network via Mesh-Cast: Jiacheng Lu,

Hui Ding,

Shiyu Zhang,

Guoping Huo; [pdf]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Jiacheng and Ding, Hui and Zhang, Shiyu and Huo, Guoping}, title = {M-Net: MRI Brain Tumor Sequential Segmentation Network via Mesh-Cast}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20116-20125} }
Diffusion Image Prior: Hamadi Chihaoui,

Paolo Favaro; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chihaoui_2025_ICCV, author = {Chihaoui, Hamadi and Favaro, Paolo}, title = {Diffusion Image Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24636-24644} }
Constructing Ophthalmic MLLM for Positioning-diagnosis Collaboration Through Clinical Cognitive Chain Reasoning: Xinyao Liu,

Diping Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Xinyao and Song, Diping}, title = {Constructing Ophthalmic MLLM for Positioning-diagnosis Collaboration Through Clinical Cognitive Chain Reasoning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21547-21556} }
Polarimetric Neural Field via Unified Complex-Valued Wave Representation: Chu Zhou,

Yixin Yang,

Junda Liao,

Heng Guo,

Boxin Shi,

Imari Sato; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Chu and Yang, Yixin and Liao, Junda and Guo, Heng and Shi, Boxin and Sato, Imari}, title = {Polarimetric Neural Field via Unified Complex-Valued Wave Representation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25660-25669} }
CLIP-Adapted Region-to-Text Learning for Generative Open-Vocabulary Semantic Segmentation: Jiannan Ge,

Lingxi Xie,

Hongtao Xie,

Pandeng Li,

Sun-Ao Liu,

Xiaopeng Zhang,

Qi Tian,

Yongdong Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Ge_2025_ICCV, author = {Ge, Jiannan and Xie, Lingxi and Xie, Hongtao and Li, Pandeng and Liu, Sun-Ao and Zhang, Xiaopeng and Tian, Qi and Zhang, Yongdong}, title = {CLIP-Adapted Region-to-Text Learning for Generative Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24034-24044} }
TeethGenerator: A two-stage framework for paired pre- and post-orthodontic 3D dental data generation: Changsong Lei,

Yaqian Liang,

Shaofeng Wang,

Jiajia Dai,

Yong-Jin Liu; [pdf] [supp]
[bibtex]
@InProceedings{Lei_2025_ICCV, author = {Lei, Changsong and Liang, Yaqian and Wang, Shaofeng and Dai, Jiajia and Liu, Yong-Jin}, title = {TeethGenerator: A two-stage framework for paired pre- and post-orthodontic 3D dental data generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25872-25881} }
Enhancing Zero-shot Object Counting via Text-guided Local Ranking and Number-evoked Global Attention: Shiwei Zhang,

Qi Zhou,

Wei Ke; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Shiwei and Zhou, Qi and Ke, Wei}, title = {Enhancing Zero-shot Object Counting via Text-guided Local Ranking and Number-evoked Global Attention}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21097-21106} }
Bidirectional Likelihood Estimation with Multi-Modal Large Language Models for Text-Video Retrieval: Dohwan Ko,

Ji Soo Lee,

Minhyuk Choi,

Zihang Meng,

Hyunwoo J. Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ko_2025_ICCV, author = {Ko, Dohwan and Lee, Ji Soo and Choi, Minhyuk and Meng, Zihang and Kim, Hyunwoo J.}, title = {Bidirectional Likelihood Estimation with Multi-Modal Large Language Models for Text-Video Retrieval}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22263-22273} }
TinyViM: Frequency Decoupling for Tiny Hybrid Vision Mamba: Xiaowen Ma,

Zhenliang Ni,

Xinghao Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_ICCV, author = {Ma, Xiaowen and Ni, Zhenliang and Chen, Xinghao}, title = {TinyViM: Frequency Decoupling for Tiny Hybrid Vision Mamba}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23519-23529} }
Bridging Domain Generalization to Multimodal Domain Generalization via Unified Representations: Hai Huang,

Yan Xia,

Sashuai Zhou,

Hanting Wang,

Shulei Wang,

Zhou Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Hai and Xia, Yan and Zhou, Sashuai and Wang, Hanting and Wang, Shulei and Zhao, Zhou}, title = {Bridging Domain Generalization to Multimodal Domain Generalization via Unified Representations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22488-22498} }
Gaussian-based World Model: Gaussian Priors for Voxel-Based Occupancy Prediction and Future Motion Prediction: Tuo Feng,

Wenguan Wang,

Yi Yang; [pdf] [supp]
[bibtex]
@InProceedings{Feng_2025_ICCV, author = {Feng, Tuo and Wang, Wenguan and Yang, Yi}, title = {Gaussian-based World Model: Gaussian Priors for Voxel-Based Occupancy Prediction and Future Motion Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25239-25249} }
SketchSplat: 3D Edge Reconstruction via Differentiable Multi-view Sketch Splatting: Haiyang Ying,

Matthias Zwicker; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ying_2025_ICCV, author = {Ying, Haiyang and Zwicker, Matthias}, title = {SketchSplat: 3D Edge Reconstruction via Differentiable Multi-view Sketch Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25649-25659} }
Sat2City: 3D City Generation from A Single Satellite Image with Cascaded Latent Diffusion: Tongyan Hua,

Lutao Jiang,

Ying-Cong Chen,

Wufan Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hua_2025_ICCV, author = {Hua, Tongyan and Jiang, Lutao and Chen, Ying-Cong and Zhao, Wufan}, title = {Sat2City: 3D City Generation from A Single Satellite Image with Cascaded Latent Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27978-27988} }
Hi3DGen: High-fidelity 3D Geometry Generation from Images via Normal Bridging: Chongjie Ye,

Yushuang Wu,

Ziteng Lu,

Jiahao Chang,

Xiaoyang Guo,

Jiaqing Zhou,

Hao Zhao,

Xiaoguang Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2025_ICCV, author = {Ye, Chongjie and Wu, Yushuang and Lu, Ziteng and Chang, Jiahao and Guo, Xiaoyang and Zhou, Jiaqing and Zhao, Hao and Han, Xiaoguang}, title = {Hi3DGen: High-fidelity 3D Geometry Generation from Images via Normal Bridging}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25050-25061} }
SC-Captioner: Improving Image Captioning with Self-Correction by Reinforcement Learning: Lin Zhang,

Xianfang Zeng,

Kangcong Li,

Gang Yu,

Tao Chen; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Lin and Zeng, Xianfang and Li, Kangcong and Yu, Gang and Chen, Tao}, title = {SC-Captioner: Improving Image Captioning with Self-Correction by Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23145-23155} }
Aligning Moments in Time using Video Queries: Yogesh Kumar,

Uday Agarwal,

Manish Gupta,

Anand Mishra; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kumar_2025_ICCV, author = {Kumar, Yogesh and Agarwal, Uday and Gupta, Manish and Mishra, Anand}, title = {Aligning Moments in Time using Video Queries}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20215-20225} }
StreamGS: Online Generalizable Gaussian Splatting Reconstruction for Unposed Image Streams: Yang Li,

Jinglu Wang,

Lei Chu,

Xiao Li,

Shiu-Hong Kao,

Ying-Cong Chen,

Yan Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Yang and Wang, Jinglu and Chu, Lei and Li, Xiao and Kao, Shiu-Hong and Chen, Ying-Cong and Lu, Yan}, title = {StreamGS: Online Generalizable Gaussian Splatting Reconstruction for Unposed Image Streams}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25841-25850} }
Ensemble Foreground Management for Unsupervised Object Discovery: Ziling Wu,

Armaghan Moemeni,

Praminda Caleb-Solly; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Ziling and Moemeni, Armaghan and Caleb-Solly, Praminda}, title = {Ensemble Foreground Management for Unsupervised Object Discovery}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20268-20279} }
RESCUE: Crowd Evacuation Simulation via Controlling SDM-United Characters: Xiaolin Liu,

Tianyi Zhou,

Hongbo Kang,

Jian Ma,

Ziwen Wang,

Jing Huang,

Wenguo Weng,

Yu-Kun Lai,

Kun Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Xiaolin and Zhou, Tianyi and Kang, Hongbo and Ma, Jian and Wang, Ziwen and Huang, Jing and Weng, Wenguo and Lai, Yu-Kun and Li, Kun}, title = {RESCUE: Crowd Evacuation Simulation via Controlling SDM-United Characters}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24955-24964} }
Signs as Tokens: A Retrieval-Enhanced Multilingual Sign Language Generator: Ronglai Zuo,

Rolandos Alexandros Potamias,

Evangelos Ververas,

Jiankang Deng,

Stefanos Zafeiriou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zuo_2025_ICCV, author = {Zuo, Ronglai and Potamias, Rolandos Alexandros and Ververas, Evangelos and Deng, Jiankang and Zafeiriou, Stefanos}, title = {Signs as Tokens: A Retrieval-Enhanced Multilingual Sign Language Generator}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23806-23816} }
GUIOdyssey: A Comprehensive Dataset for Cross-App GUI Navigation on Mobile Devices: Quanfeng Lu,

Wenqi Shao,

Zitao Liu,

Lingxiao Du,

Fanqing Meng,

Boxuan Li,

Botong Chen,

Siyuan Huang,

Kaipeng Zhang,

Ping Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Quanfeng and Shao, Wenqi and Liu, Zitao and Du, Lingxiao and Meng, Fanqing and Li, Boxuan and Chen, Botong and Huang, Siyuan and Zhang, Kaipeng and Luo, Ping}, title = {GUIOdyssey: A Comprehensive Dataset for Cross-App GUI Navigation on Mobile Devices}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22404-22414} }
TOTP: Transferable Online Pedestrian Trajectory Prediction with Temporal-Adaptive Mamba Latent Diffusion: Ziyang Ren,

Ping Wei,

Shangqi Deng,

Haowen Tang,

Jiapeng Li,

Huan Li; [pdf]
[bibtex]
@InProceedings{Ren_2025_ICCV, author = {Ren, Ziyang and Wei, Ping and Deng, Shangqi and Tang, Haowen and Li, Jiapeng and Li, Huan}, title = {TOTP: Transferable Online Pedestrian Trajectory Prediction with Temporal-Adaptive Mamba Latent Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26263-26272} }
Supercharging Floorplan Localization with Semantic Rays: Yuval Grader,

Hadar Averbuch-Elor; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Grader_2025_ICCV, author = {Grader, Yuval and Averbuch-Elor, Hadar}, title = {Supercharging Floorplan Localization with Semantic Rays}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27116-27125} }
MDP3: A Training-free Approach for List-wise Frame Selection in Video-LLMs: Hui Sun,

Shiyin Lu,

Huanyu Wang,

Qing-Guo Chen,

Zhao Xu,

Weihua Luo,

Kaifu Zhang,

Ming Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Hui and Lu, Shiyin and Wang, Huanyu and Chen, Qing-Guo and Xu, Zhao and Luo, Weihua and Zhang, Kaifu and Li, Ming}, title = {MDP3: A Training-free Approach for List-wise Frame Selection in Video-LLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24090-24101} }
Toward Long-Tailed Online Anomaly Detection through Class-Agnostic Concepts: Chiao-An Yang,

Kuan-Chuan Peng,

Raymond A. Yeh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Chiao-An and Peng, Kuan-Chuan and Yeh, Raymond A.}, title = {Toward Long-Tailed Online Anomaly Detection through Class-Agnostic Concepts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23419-23430} }
DiffTell: A High-Quality Dataset for Describing Image Manipulation Changes: Zonglin Di,

Jing Shi,

Yifei Fan,

Hao Tan,

Alexander Black,

John Collomosse,

Yang Liu; [pdf] [supp]
[bibtex]
@InProceedings{Di_2025_ICCV, author = {Di, Zonglin and Shi, Jing and Fan, Yifei and Tan, Hao and Black, Alexander and Collomosse, John and Liu, Yang}, title = {DiffTell: A High-Quality Dataset for Describing Image Manipulation Changes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24580-24590} }
TrafficLoc: Localizing Traffic Surveillance Cameras in 3D Scenes: Yan Xia,

Yunxiang Lu,

Rui Song,

Oussema Dhaouadi,

João F. Henriques,

Daniel Cremers; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2025_ICCV, author = {Xia, Yan and Lu, Yunxiang and Song, Rui and Dhaouadi, Oussema and Henriques, Jo\~ao F. and Cremers, Daniel}, title = {TrafficLoc: Localizing Traffic Surveillance Cameras in 3D Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28685-28695} }
CA-I2P: Channel-Adaptive Registration Network with Global Optimal Selection: Zhixin Cheng,

Jiacheng Deng,

Xinjun Li,

Xiaotian Yin,

Bohao Liao,

Baoqun Yin,

Wenfei Yang,

Tianzhu Zhang; [pdf]
[bibtex]
@InProceedings{Cheng_2025_ICCV, author = {Cheng, Zhixin and Deng, Jiacheng and Li, Xinjun and Yin, Xiaotian and Liao, Bohao and Yin, Baoqun and Yang, Wenfei and Zhang, Tianzhu}, title = {CA-I2P: Channel-Adaptive Registration Network with Global Optimal Selection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27739-27749} }
Exploiting Vision Language Model for Training-Free 3D Point Cloud OOD Detection via Graph Score Propagation: Tiankai Chen,

Yushu Li,

Adam Goodge,

Fei Teng,

Xulei Yang,

Tianrui Li,

Xun Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Tiankai and Li, Yushu and Goodge, Adam and Teng, Fei and Yang, Xulei and Li, Tianrui and Xu, Xun}, title = {Exploiting Vision Language Model for Training-Free 3D Point Cloud OOD Detection via Graph Score Propagation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28797-28807} }
AHCPTQ: Accurate and Hardware-Compatible Post-Training Quantization for Segment Anything Model: Wenlun Zhang,

Yunshan Zhong,

Shimpei Ando,

Kentaro Yoshioka; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Wenlun and Zhong, Yunshan and Ando, Shimpei and Yoshioka, Kentaro}, title = {AHCPTQ: Accurate and Hardware-Compatible Post-Training Quantization for Segment Anything Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22383-22392} }
WonderTurbo: Generating Interactive 3D World in 0.72 Seconds: Chaojun Ni,

Xiaofeng Wang,

Zheng Zhu,

Weijie Wang,

Haoyun Li,

Guosheng Zhao,

Jie Li,

Wenkang Qin,

Guan Huang,

Wenjun Mei; [pdf] [supp]
[bibtex]
@InProceedings{Ni_2025_ICCV, author = {Ni, Chaojun and Wang, Xiaofeng and Zhu, Zheng and Wang, Weijie and Li, Haoyun and Zhao, Guosheng and Li, Jie and Qin, Wenkang and Huang, Guan and Mei, Wenjun}, title = {WonderTurbo: Generating Interactive 3D World in 0.72 Seconds}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27423-27434} }
S3E: Self-Supervised State Estimation for Radar-Inertial System: Shengpeng Wang,

Yulong Xie,

Qing Liao,

Wei Wang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Shengpeng and Xie, Yulong and Liao, Qing and Wang, Wei}, title = {S3E: Self-Supervised State Estimation for Radar-Inertial System}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26686-26695} }
GenFlow3D: Generative Scene Flow Estimation and Prediction on Point Cloud Sequences: Hanlin Li,

Wenming Weng,

Yueyi Zhang,

Zhiwei Xiong; [pdf]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Hanlin and Weng, Wenming and Zhang, Yueyi and Xiong, Zhiwei}, title = {GenFlow3D: Generative Scene Flow Estimation and Prediction on Point Cloud Sequences}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27488-27497} }
PBCAT: Patch-Based Composite Adversarial Training against Physically Realizable Attacks on Object Detection: Xiao Li,

Yiming Zhu,

Yifan Huang,

Wei Zhang,

Yingzhe He,

Jie Shi,

Xiaolin Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Xiao and Zhu, Yiming and Huang, Yifan and Zhang, Wei and He, Yingzhe and Shi, Jie and Hu, Xiaolin}, title = {PBCAT: Patch-Based Composite Adversarial Training against Physically Realizable Attacks on Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24456-24466} }
PossLoss: A Reliable and Sensitive Facial Landmark Detection Loss Function: Qikui Zhu; [pdf]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Qikui}, title = {PossLoss: A Reliable and Sensitive Facial Landmark Detection Loss Function}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24858-24867} }
StealthAttack: Robust 3D Gaussian Splatting Poisoning via Density-Guided Illusions: Bo-Hsu Ke,

You-Zhe Xie,

Yu-Lun Liu,

Wei-Chen Chiu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ke_2025_ICCV, author = {Ke, Bo-Hsu and Xie, You-Zhe and Liu, Yu-Lun and Chiu, Wei-Chen}, title = {StealthAttack: Robust 3D Gaussian Splatting Poisoning via Density-Guided Illusions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27400-27411} }
LightCity: An Urban Dataset for Outdoor Inverse Rendering and Reconstruction under Multi-illumination Conditions: Jingjing Wang,

Qirui Hu,

Chong Bao,

Yuke Zhu,

Hujun Bao,

Zhaopeng Cui,

Guofeng Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Jingjing and Hu, Qirui and Bao, Chong and Zhu, Yuke and Bao, Hujun and Cui, Zhaopeng and Zhang, Guofeng}, title = {LightCity: An Urban Dataset for Outdoor Inverse Rendering and Reconstruction under Multi-illumination Conditions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26477-26487} }
Region-aware Anchoring Mechanism for Efficient Referring Visual Grounding: Shuyi Ouyang,

Ziwei Niu,

Hongyi Wang,

Yen-Wei Chen,

Lanfen Lin; [pdf] [supp]
[bibtex]
@InProceedings{Ouyang_2025_ICCV, author = {Ouyang, Shuyi and Niu, Ziwei and Wang, Hongyi and Chen, Yen-Wei and Lin, Lanfen}, title = {Region-aware Anchoring Mechanism for Efficient Referring Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24192-24202} }
Hybrid-Tower: Fine-grained Pseudo-query Interaction and Generation for Text-to-Video Retrieval: Bangxiang Lan,

Ruobing Xie,

Ruixiang Zhao,

Xingwu Sun,

Zhanhui Kang,

Gang Yang,

Xirong Li; [pdf] [supp]
[bibtex]
@InProceedings{Lan_2025_ICCV, author = {Lan, Bangxiang and Xie, Ruobing and Zhao, Ruixiang and Sun, Xingwu and Kang, Zhanhui and Yang, Gang and Li, Xirong}, title = {Hybrid-Tower: Fine-grained Pseudo-query Interaction and Generation for Text-to-Video Retrieval}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24497-24506} }
Monocular Semantic Scene Completion via Masked Recurrent Networks: Xuzhi Wang,

Xinran Wu,

Song Wang,

Lingdong Kong,

Ziping Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Xuzhi and Wu, Xinran and Wang, Song and Kong, Lingdong and Zhao, Ziping}, title = {Monocular Semantic Scene Completion via Masked Recurrent Networks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24811-24822} }
TurboReg: TurboClique for Robust and Efficient Point Cloud Registration: Shaocheng Yan,

Pengcheng Shi,

Zhenjun Zhao,

Kaixin Wang,

Kuang Cao,

Ji Wu,

Jiayuan Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2025_ICCV, author = {Yan, Shaocheng and Shi, Pengcheng and Zhao, Zhenjun and Wang, Kaixin and Cao, Kuang and Wu, Ji and Li, Jiayuan}, title = {TurboReg: TurboClique for Robust and Efficient Point Cloud Registration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26371-26381} }
Neural Inverse Rendering for High-Accuracy 3D Measurement of Moving Objects with Fewer Phase-Shifting Patterns: Yuki Urakawa,

Yoshihiro Watanabe; [pdf] [supp]
[bibtex]
@InProceedings{Urakawa_2025_ICCV, author = {Urakawa, Yuki and Watanabe, Yoshihiro}, title = {Neural Inverse Rendering for High-Accuracy 3D Measurement of Moving Objects with Fewer Phase-Shifting Patterns}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27692-27701} }
Dynamic-VLM: Simple Dynamic Visual Token Compression for VideoLLM: Han Wang,

Yuxiang Nie,

Yongjie Ye,

Yanjie Wang,

Shuai Li,

Haiyang Yu,

Jinghui Lu,

Can Huang; [pdf]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Han and Nie, Yuxiang and Ye, Yongjie and Wang, Yanjie and Li, Shuai and Yu, Haiyang and Lu, Jinghui and Huang, Can}, title = {Dynamic-VLM: Simple Dynamic Visual Token Compression for VideoLLM}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20812-20823} }
Two Losses, One Goal: Balancing Conflict Gradients for Semi-supervised Semantic Segmentation: Rui Sun,

Huayu Mai,

Wangkai Li,

Yujia Chen,

Yuan Wang; [pdf]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Rui and Mai, Huayu and Li, Wangkai and Chen, Yujia and Wang, Yuan}, title = {Two Losses, One Goal: Balancing Conflict Gradients for Semi-supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20357-20367} }
SAM4D: Segment Anything in Camera and LiDAR Streams: Jianyun Xu,

Song Wang,

Ziqian Ni,

Chunyong Hu,

Sheng Yang,

Jianke Zhu,

Qiang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Jianyun and Wang, Song and Ni, Ziqian and Hu, Chunyong and Yang, Sheng and Zhu, Jianke and Li, Qiang}, title = {SAM4D: Segment Anything in Camera and LiDAR Streams}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28535-28545} }
TAB: Transformer Attention Bottlenecks enable User Intervention and Debugging in Vision-Language Models: Pooyan Rahmanzadehgervi,

Hung Huy Nguyen,

Rosanne Liu,

Long Mai,

Anh Totti Nguyen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rahmanzadehgervi_2025_ICCV, author = {Rahmanzadehgervi, Pooyan and Nguyen, Hung Huy and Liu, Rosanne and Mai, Long and Nguyen, Anh Totti}, title = {TAB: Transformer Attention Bottlenecks enable User Intervention and Debugging in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22551-22562} }
Feature Extraction and Representation of Pre-training Point Cloud Based on Diffusion Models: Chang Qiu,

Feipeng Da,

Zilei Zhang; [pdf]
[bibtex]
@InProceedings{Qiu_2025_ICCV, author = {Qiu, Chang and Da, Feipeng and Zhang, Zilei}, title = {Feature Extraction and Representation of Pre-training Point Cloud Based on Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26559-26568} }
EmbodiedSplat: Personalized Real-to-Sim-to-Real Navigation with Gaussian Splats from a Mobile Device: Gunjan Chhablani,

Xiaomeng Ye,

Muhammad Zubair Irshad,

Zsolt Kira; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chhablani_2025_ICCV, author = {Chhablani, Gunjan and Ye, Xiaomeng and Irshad, Muhammad Zubair and Kira, Zsolt}, title = {EmbodiedSplat: Personalized Real-to-Sim-to-Real Navigation with Gaussian Splats from a Mobile Device}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25431-25441} }
SynCity: Training-Free Generation of 3D Worlds: Paul Engstler,

Aleksandar Shtedritski,

Iro Laina,

Christian Rupprecht,

Andrea Vedaldi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Engstler_2025_ICCV, author = {Engstler, Paul and Shtedritski, Aleksandar and Laina, Iro and Rupprecht, Christian and Vedaldi, Andrea}, title = {SynCity: Training-Free Generation of 3D Worlds}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27585-27595} }
Resonance: Learning to Predict Social-Aware Pedestrian Trajectories as Co-Vibrations: Conghao Wong,

Ziqian Zou,

Beihao Xia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wong_2025_ICCV, author = {Wong, Conghao and Zou, Ziqian and Xia, Beihao}, title = {Resonance: Learning to Predict Social-Aware Pedestrian Trajectories as Co-Vibrations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25788-25799} }
Leaps and Bounds: An Improved Point Cloud Winding Number Formulation for Fast Normal Estimation and Surface Reconstruction: Chamin Hewa Koneputugodage,

Dylan Campbell,

Stephen Gould; [pdf] [supp]
[bibtex]
@InProceedings{Koneputugodage_2025_ICCV, author = {Koneputugodage, Chamin Hewa and Campbell, Dylan and Gould, Stephen}, title = {Leaps and Bounds: An Improved Point Cloud Winding Number Formulation for Fast Normal Estimation and Surface Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26116-26125} }
Cross-Category Subjectivity Generalization for Style-Adaptive Sketch Re-ID: Zechao Hu,

Zhengwei Yang,

Hao Li,

Zheng Wang,

Yixiong Zou; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2025_ICCV, author = {Hu, Zechao and Yang, Zhengwei and Li, Hao and Wang, Zheng and Zou, Yixiong}, title = {Cross-Category Subjectivity Generalization for Style-Adaptive Sketch Re-ID}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22644-22653} }
CARL: Causality-guided Architecture Representation Learning for an Interpretable Performance Predictor: Han Ji,

Yuqi Feng,

Jiahao Fan,

Yanan Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ji_2025_ICCV, author = {Ji, Han and Feng, Yuqi and Fan, Jiahao and Sun, Yanan}, title = {CARL: Causality-guided Architecture Representation Learning for an Interpretable Performance Predictor}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23019-23029} }
An OpenMind for 3D Medical Vision Self-supervised Learning: Tassilo Wald,

Constantin Ulrich,

Jonathan Suprijadi,

Sebastian Ziegler,

Michal Nohel,

Robin Peretzke,

Gregor Kohler,

Klaus Maier-Hein; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wald_2025_ICCV, author = {Wald, Tassilo and Ulrich, Constantin and Suprijadi, Jonathan and Ziegler, Sebastian and Nohel, Michal and Peretzke, Robin and Kohler, Gregor and Maier-Hein, Klaus}, title = {An OpenMind for 3D Medical Vision Self-supervised Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23839-23879} }
Dynamic Dictionary Learning for Remote Sensing Image Segmentation: Xuechao Zou,

Yue Li,

Shun Zhang,

Kai Li,

Shiying Wang,

Pin Tao,

Junliang Xing,

Congyan Lang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zou_2025_ICCV, author = {Zou, Xuechao and Li, Yue and Zhang, Shun and Li, Kai and Wang, Shiying and Tao, Pin and Xing, Junliang and Lang, Congyan}, title = {Dynamic Dictionary Learning for Remote Sensing Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22457-22466} }
MaterialMVP: Illumination-Invariant Material Generation via Multi-view PBR Diffusion: Zebin He,

Mingxin Yang,

Shuhui Yang,

Yixuan Tang,

Tao Wang,

Kaihao Zhang,

Guanying Chen,

Yuhong Liu,

Jie Jiang,

Chunchao Guo,

Wenhan Luo; [pdf] [arXiv]
[bibtex]
@InProceedings{He_2025_ICCV, author = {He, Zebin and Yang, Mingxin and Yang, Shuhui and Tang, Yixuan and Wang, Tao and Zhang, Kaihao and Chen, Guanying and Liu, Yuhong and Jiang, Jie and Guo, Chunchao and Luo, Wenhan}, title = {MaterialMVP: Illumination-Invariant Material Generation via Multi-view PBR Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26294-26305} }
LookOut: Real-World Humanoid Egocentric Navigation: Boxiao Pan,

Adam W. Harley,

Francis Engelmann,

C. Karen Liu,

Leonidas J. Guibas; [pdf]
[bibtex]
@InProceedings{Pan_2025_ICCV, author = {Pan, Boxiao and Harley, Adam W. and Engelmann, Francis and Liu, C. Karen and Guibas, Leonidas J.}, title = {LookOut: Real-World Humanoid Egocentric Navigation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24977-24988} }
Lightweight Gradient-Aware Upscaling of 3D Gaussian Splatting Images: Simon Niedermayr,

Christoph Neuhauser,

Rüdiger Westermann; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Niedermayr_2025_ICCV, author = {Niedermayr, Simon and Neuhauser, Christoph and Westermann, R\"udiger}, title = {Lightweight Gradient-Aware Upscaling of 3D Gaussian Splatting Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25862-25871} }
Balancing Conservatism and Aggressiveness: Prototype-Affinity Hybrid Network for Few-Shot Segmentation: Tianyu Zou,

Shengwu Xiong,

Ruilin Yao,

Yi Rong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zou_2025_ICCV, author = {Zou, Tianyu and Xiong, Shengwu and Yao, Ruilin and Rong, Yi}, title = {Balancing Conservatism and Aggressiveness: Prototype-Affinity Hybrid Network for Few-Shot Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20561-20571} }
Large Scene Generation with Cube-Absorb Discrete Diffusion: Qianjiang Hu,

Wei Hu; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2025_ICCV, author = {Hu, Qianjiang and Hu, Wei}, title = {Large Scene Generation with Cube-Absorb Discrete Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25186-25196} }
MS3D: High-Quality 3D Generation via Multi-Scale Representation Modeling: Guan Luo,

Jianfeng Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Luo_2025_ICCV, author = {Luo, Guan and Zhang, Jianfeng}, title = {MS3D: High-Quality 3D Generation via Multi-Scale Representation Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26336-26348} }
Memory-Efficient 4-bit Preconditioned Stochastic Optimization: Jingyang Li,

Kuangyu Ding,

Kim-Chuan Toh,

Pan Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Jingyang and Ding, Kuangyu and Toh, Kim-Chuan and Zhou, Pan}, title = {Memory-Efficient 4-bit Preconditioned Stochastic Optimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22633-22643} }
On the Recovery of Cameras from Fundamental Matrices: Rakshith Madhavan,

Federica Arrigoni; [pdf] [supp]
[bibtex]
@InProceedings{Madhavan_2025_ICCV, author = {Madhavan, Rakshith and Arrigoni, Federica}, title = {On the Recovery of Cameras from Fundamental Matrices}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20934-20943} }
Feather the Throttle: Revisiting Visual Token Pruning for Vision-Language Model Acceleration: Mark Endo,

Xiaohan Wang,

Serena Yeung-Levy; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Endo_2025_ICCV, author = {Endo, Mark and Wang, Xiaohan and Yeung-Levy, Serena}, title = {Feather the Throttle: Revisiting Visual Token Pruning for Vision-Language Model Acceleration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22826-22835} }
MEGA: Memory-Efficient 4D Gaussian Splatting for Dynamic Scenes: Xinjie Zhang,

Zhening Liu,

Yifan Zhang,

Xingtong Ge,

Dailan He,

Tongda Xu,

Yan Wang,

Zehong Lin,

Shuicheng Yan,

Jun Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xinjie and Liu, Zhening and Zhang, Yifan and Ge, Xingtong and He, Dailan and Xu, Tongda and Wang, Yan and Lin, Zehong and Yan, Shuicheng and Zhang, Jun}, title = {MEGA: Memory-Efficient 4D Gaussian Splatting for Dynamic Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27828-27838} }
Bridging 3D Anomaly Localization and Repair via High-Quality Continuous Geometric Representation: Bozhong Zheng,

Jinye Gan,

Xiaohao Xu,

Xintao Chen,

Wenqiao Li,

Xiaonan Huang,

Na Ni,

Yingna Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Bozhong and Gan, Jinye and Xu, Xiaohao and Chen, Xintao and Li, Wenqiao and Huang, Xiaonan and Ni, Na and Wu, Yingna}, title = {Bridging 3D Anomaly Localization and Repair via High-Quality Continuous Geometric Representation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27063-27072} }
Foresight in Motion: Reinforcing Trajectory Prediction with Reward Heuristics: Muleilan Pei,

Shaoshuai Shi,

Xuesong Chen,

Xu Liu,

Shaojie Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pei_2025_ICCV, author = {Pei, Muleilan and Shi, Shaoshuai and Chen, Xuesong and Liu, Xu and Shen, Shaojie}, title = {Foresight in Motion: Reinforcing Trajectory Prediction with Reward Heuristics}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28303-28312} }
Spectral Sensitivity Estimation with an Uncalibrated Diffraction Grating: Lilika Makabe,

Hiroaki Santo,

Fumio Okura,

Michael S. Brown,

Yasuyuki Matsushita; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Makabe_2025_ICCV, author = {Makabe, Lilika and Santo, Hiroaki and Okura, Fumio and Brown, Michael S. and Matsushita, Yasuyuki}, title = {Spectral Sensitivity Estimation with an Uncalibrated Diffraction Grating}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27252-27261} }
Moment Quantization for Video Temporal Grounding: Xiaolong Sun,

Le Wang,

Sanping Zhou,

Liushuai Shi,

Kun Xia,

Mengnan Liu,

Yabing Wang,

Gang Hua; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Xiaolong and Wang, Le and Zhou, Sanping and Shi, Liushuai and Xia, Kun and Liu, Mengnan and Wang, Yabing and Hua, Gang}, title = {Moment Quantization for Video Temporal Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20137-20146} }
MSQ: Memory-Efficient Bit Sparsification Quantization: Seokho Han,

Seoyeon Yoon,

Jinhee Kim,

Dongwei Wang,

Kang Eun Jeon,

Huanrui Yang,

Jong Hwan Ko; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2025_ICCV, author = {Han, Seokho and Yoon, Seoyeon and Kim, Jinhee and Wang, Dongwei and Jeon, Kang Eun and Yang, Huanrui and Ko, Jong Hwan}, title = {MSQ: Memory-Efficient Bit Sparsification Quantization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21885-21894} }
Cycle Consistency as Reward: Learning Image-Text Alignment without Human Preferences: Hyojin Bahng,

Caroline Chan,

Fredo Durand,

Phillip Isola; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bahng_2025_ICCV, author = {Bahng, Hyojin and Chan, Caroline and Durand, Fredo and Isola, Phillip}, title = {Cycle Consistency as Reward: Learning Image-Text Alignment without Human Preferences}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22934-22946} }
Vid-Group: Temporal Video Grounding Pretraining from Unlabeled Videos in the Wild: Peijun Bao,

Chenqi Kong,

Siyuan Yang,

Zihao Shao,

Xinghao Jiang,

Boon Poh Ng,

Meng Hwa Er,

Alex Kot; [pdf] [supp]
[bibtex]
@InProceedings{Bao_2025_ICCV, author = {Bao, Peijun and Kong, Chenqi and Yang, Siyuan and Shao, Zihao and Jiang, Xinghao and Ng, Boon Poh and Er, Meng Hwa and Kot, Alex}, title = {Vid-Group: Temporal Video Grounding Pretraining from Unlabeled Videos in the Wild}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20541-20550} }
BANet: Bilateral Aggregation Network for Mobile Stereo Matching: Gangwei Xu,

Jiaxin Liu,

Xianqi Wang,

Junda Cheng,

Yong Deng,

Jinliang Zang,

Yurui Chen,

Xin Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Gangwei and Liu, Jiaxin and Wang, Xianqi and Cheng, Junda and Deng, Yong and Zang, Jinliang and Chen, Yurui and Yang, Xin}, title = {BANet: Bilateral Aggregation Network for Mobile Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28870-28880} }
AIM: Adaptive Inference of Multi-Modal LLMs via Token Merging and Pruning: Yiwu Zhong,

Zhuoming Liu,

Yin Li,

Liwei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhong_2025_ICCV, author = {Zhong, Yiwu and Liu, Zhuoming and Li, Yin and Wang, Liwei}, title = {AIM: Adaptive Inference of Multi-Modal LLMs via Token Merging and Pruning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20180-20192} }
G2SF: Geometry-Guided Score Fusion for Multimodal Industrial Anomaly Detection: Chengyu Tao,

Xuanming Cao,

Juan Du; [pdf] [supp]
[bibtex]
@InProceedings{Tao_2025_ICCV, author = {Tao, Chengyu and Cao, Xuanming and Du, Juan}, title = {G2SF: Geometry-Guided Score Fusion for Multimodal Industrial Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20551-20560} }
SA-Occ: Satellite-Assisted 3D Occupancy Prediction in Real World: Chen Chen,

Zhirui Wang,

Taowei Sheng,

Yi Jiang,

Yundu Li,

Peirui Cheng,

Luning Zhang,

Kaiqiang Chen,

Yanfeng Hu,

Xue Yang,

Xian Sun; [pdf]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Chen and Wang, Zhirui and Sheng, Taowei and Jiang, Yi and Li, Yundu and Cheng, Peirui and Zhang, Luning and Chen, Kaiqiang and Hu, Yanfeng and Yang, Xue and Sun, Xian}, title = {SA-Occ: Satellite-Assisted 3D Occupancy Prediction in Real World}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27021-27030} }
Implicit Counterfactual Learning for Audio-Visual Segmentation: Mingfeng Zha,

Tianyu Li,

Guoqing Wang,

Peng Wang,

Yangyang Wu,

Yang Yang,

Heng Tao Shen; [pdf] [arXiv]
[bibtex]
@InProceedings{Zha_2025_ICCV, author = {Zha, Mingfeng and Li, Tianyu and Wang, Guoqing and Wang, Peng and Wu, Yangyang and Yang, Yang and Shen, Heng Tao}, title = {Implicit Counterfactual Learning for Audio-Visual Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22349-22360} }
FlowEdit: Inversion-Free Text-Based Editing Using Pre-Trained Flow Models: Vladimir Kulikov,

Matan Kleiner,

Inbar Huberman-Spiegelglas,

Tomer Michaeli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kulikov_2025_ICCV, author = {Kulikov, Vladimir and Kleiner, Matan and Huberman-Spiegelglas, Inbar and Michaeli, Tomer}, title = {FlowEdit: Inversion-Free Text-Based Editing Using Pre-Trained Flow Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19721-19730} }
CLIPSym: Delving into Symmetry Detection with CLIP: Tinghan Yang,

Md Ashiqur Rahman,

Raymond A. Yeh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Tinghan and Rahman, Md Ashiqur and Yeh, Raymond A.}, title = {CLIPSym: Delving into Symmetry Detection with CLIP}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21003-21013} }
MRGen: Segmentation Data Engine For Underrepresented MRI Modalities: Haoning Wu,

Ziheng Zhao,

Ya Zhang,

Yanfeng Wang,

Weidi Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Haoning and Zhao, Ziheng and Zhang, Ya and Wang, Yanfeng and Xie, Weidi}, title = {MRGen: Segmentation Data Engine For Underrepresented MRI Modalities}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19903-19913} }
Accelerate 3D Object Detection Models via Zero-Shot Attention Key Pruning: Lizhen Xu,

Xiuxiu Bai,

Xiaojun Jia,

Jianwu Fang,

Shanmin Pang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Lizhen and Bai, Xiuxiu and Jia, Xiaojun and Fang, Jianwu and Pang, Shanmin}, title = {Accelerate 3D Object Detection Models via Zero-Shot Attention Key Pruning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23085-23094} }
Adaptive Learning of High-Value Regions for Semi-Supervised Medical Image Segmentation: Tao Lei,

Ziyao Yang,

Xingwu Wang,

Yi Wang,

Xuan Wang,

Feiman Sun,

Asoke K. Nandi; [pdf] [supp]
[bibtex]
@InProceedings{Lei_2025_ICCV, author = {Lei, Tao and Yang, Ziyao and Wang, Xingwu and Wang, Yi and Wang, Xuan and Sun, Feiman and Nandi, Asoke K.}, title = {Adaptive Learning of High-Value Regions for Semi-Supervised Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21450-21459} }
Learning Neural Scene Representation from iToF Imaging: Wenjie Chang,

Hanzhi Chang,

Yueyi Zhang,

Wenfei Yang,

Tianzhu Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Chang_2025_ICCV, author = {Chang, Wenjie and Chang, Hanzhi and Zhang, Yueyi and Yang, Wenfei and Zhang, Tianzhu}, title = {Learning Neural Scene Representation from iToF Imaging}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27937-27946} }
Robust Machine Unlearning for Quantized Neural Networks via Adaptive Gradient Reweighting with Similar Labels: Yujia Tong,

Yuze Wang,

Jingling Yuan,

Chuang Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tong_2025_ICCV, author = {Tong, Yujia and Wang, Yuze and Yuan, Jingling and Hu, Chuang}, title = {Robust Machine Unlearning for Quantized Neural Networks via Adaptive Gradient Reweighting with Similar Labels}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20603-20612} }
SegAnyPET: Universal Promptable Segmentation from Positron Emission Tomography Images: Yichi Zhang,

Le Xue,

Wenbo Zhang,

Lanlan Li,

Yuchen Liu,

Chen Jiang,

Yuan Cheng,

Yuan Qi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yichi and Xue, Le and Zhang, Wenbo and Li, Lanlan and Liu, Yuchen and Jiang, Chen and Cheng, Yuan and Qi, Yuan}, title = {SegAnyPET: Universal Promptable Segmentation from Positron Emission Tomography Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21107-21116} }
ILLUME: Illuminating Your LLMs to See, Draw, and Self-Enhance: Chunwei Wang,

Guansong Lu,

Junwei Yang,

Runhui Huang,

Jianhua Han,

Lu Hou,

Wei Zhang,

Hang Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Chunwei and Lu, Guansong and Yang, Junwei and Huang, Runhui and Han, Jianhua and Hou, Lu and Zhang, Wei and Xu, Hang}, title = {ILLUME: Illuminating Your LLMs to See, Draw, and Self-Enhance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21612-21622} }
No Pose at All: Self-Supervised Pose-Free 3D Gaussian Splatting from Sparse Views: Ranran Huang,

Krystian Mikolajczyk; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Ranran and Mikolajczyk, Krystian}, title = {No Pose at All: Self-Supervised Pose-Free 3D Gaussian Splatting from Sparse Views}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27947-27957} }
EmbodiedOcc: Embodied 3D Occupancy Prediction for Vision-based Online Scene Understanding: Yuqi Wu,

Wenzhao Zheng,

Sicheng Zuo,

Yuanhui Huang,

Jie Zhou,

Jiwen Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Yuqi and Zheng, Wenzhao and Zuo, Sicheng and Huang, Yuanhui and Zhou, Jie and Lu, Jiwen}, title = {EmbodiedOcc: Embodied 3D Occupancy Prediction for Vision-based Online Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26360-26370} }
DiST-4D: Disentangled Spatiotemporal Diffusion with Metric Depth for 4D Driving Scene Generation: Jiazhe Guo,

Yikang Ding,

Xiwu Chen,

Shuo Chen,

Bohan Li,

Yingshuang Zou,

Xiaoyang Lyu,

Feiyang Tan,

Xiaojuan Qi,

Zhiheng Li,

Hao Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2025_ICCV, author = {Guo, Jiazhe and Ding, Yikang and Chen, Xiwu and Chen, Shuo and Li, Bohan and Zou, Yingshuang and Lyu, Xiaoyang and Tan, Feiyang and Qi, Xiaojuan and Li, Zhiheng and Zhao, Hao}, title = {DiST-4D: Disentangled Spatiotemporal Diffusion with Metric Depth for 4D Driving Scene Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27231-27241} }
Enhancing Prompt Generation with Adaptive Refinement for Camouflaged Object Detection: Xuehan Chen,

Guangyu Ren,

Tianhong Dai,

Tania Stathaki,

Hengyan Liu; [pdf]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Xuehan and Ren, Guangyu and Dai, Tianhong and Stathaki, Tania and Liu, Hengyan}, title = {Enhancing Prompt Generation with Adaptive Refinement for Camouflaged Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20672-20682} }
Generalizable Non-Line-of-Sight Imaging with Learnable Physical Priors: Shida Sun,

Yue Li,

Yueyi Zhang,

Zhiwei Xiong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Shida and Li, Yue and Zhang, Yueyi and Xiong, Zhiwei}, title = {Generalizable Non-Line-of-Sight Imaging with Learnable Physical Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25040-25049} }
ReMP-AD: Retrieval-enhanced Multi-modal Prompt Fusion for Few-Shot Industrial Visual Anomaly Detection: Hongchi Ma,

Guanglei Yang,

Debin Zhao,

Yanli Ji,

Wangmeng Zuo; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2025_ICCV, author = {Ma, Hongchi and Yang, Guanglei and Zhao, Debin and Ji, Yanli and Zuo, Wangmeng}, title = {ReMP-AD: Retrieval-enhanced Multi-modal Prompt Fusion for Few-Shot Industrial Visual Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20425-20434} }
TOGA: Temporally Grounded Open-Ended Video QA with Weak Supervision: Ayush Gupta,

Anirban Roy,

Rama Chellappa,

Nathaniel D. Bastian,

Alvaro Velasquez,

Susmit Jha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gupta_2025_ICCV, author = {Gupta, Ayush and Roy, Anirban and Chellappa, Rama and Bastian, Nathaniel D. and Velasquez, Alvaro and Jha, Susmit}, title = {TOGA: Temporally Grounded Open-Ended Video QA with Weak Supervision}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23593-23603} }
MUSE-VL: Modeling Unified VLM through Semantic Discrete Encoding: Rongchang Xie,

Chen Du,

Ping Song,

Chang Liu; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2025_ICCV, author = {Xie, Rongchang and Du, Chen and Song, Ping and Liu, Chang}, title = {MUSE-VL: Modeling Unified VLM through Semantic Discrete Encoding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24135-24146} }
Task Vector Quantization for Memory-Efficient Model Merging: Youngeun Kim,

Seunghwan Lee,

Aecheon Jung,

Bogon Ryu,

Sungeun Hong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Youngeun and Lee, Seunghwan and Jung, Aecheon and Ryu, Bogon and Hong, Sungeun}, title = {Task Vector Quantization for Memory-Efficient Model Merging}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20105-20115} }
GeoFormer: Geometry Point Encoder for 3D Object Detection with Graph-based Transformer: Xin Jin,

Haisheng Su,

Cong Ma,

Kai Liu,

Wei Wu,

Fei Hui,

Junchi Yan; [pdf]
[bibtex]
@InProceedings{Jin_2025_ICCV, author = {Jin, Xin and Su, Haisheng and Ma, Cong and Liu, Kai and Wu, Wei and Hui, Fei and Yan, Junchi}, title = {GeoFormer: Geometry Point Encoder for 3D Object Detection with Graph-based Transformer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26879-26889} }
Unraveling the Effects of Synthetic Data on End-to-End Autonomous Driving: Junhao Ge,

Zuhong Liu,

Longteng Fan,

Yifan Jiang,

Jiaqi Su,

Yiming Li,

Zhejun Zhang,

Siheng Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ge_2025_ICCV, author = {Ge, Junhao and Liu, Zuhong and Fan, Longteng and Jiang, Yifan and Su, Jiaqi and Li, Yiming and Zhang, Zhejun and Chen, Siheng}, title = {Unraveling the Effects of Synthetic Data on End-to-End Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28859-28869} }
NeuFrameQ: Neural Frame Fields for Scalable and Generalizable Anisotropic Quadrangulation: Ying-Tian Liu,

Jiajun Li,

Yu-Tao Liu,

Xin Yu,

Yuan-Chen Guo,

Yan-Pei Cao,

Ding Liang,

Ariel Shamir,

Song-Hai Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Ying-Tian and Li, Jiajun and Liu, Yu-Tao and Yu, Xin and Guo, Yuan-Chen and Cao, Yan-Pei and Liang, Ding and Shamir, Ariel and Zhang, Song-Hai}, title = {NeuFrameQ: Neural Frame Fields for Scalable and Generalizable Anisotropic Quadrangulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28000-28009} }
Keyframe-oriented Vision Token Pruning: Enhancing Efficiency of Large Vision Language Models on Long-Form Video Processing: Yudong Liu,

Jingwei Sun,

Yueqian Lin,

Jianyi Zhang,

Jingyang Zhang,

Ming Yin,

Qinsi Wang,

Hai Li,

Yiran Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Yudong and Sun, Jingwei and Lin, Yueqian and Zhang, Jianyi and Zhang, Jingyang and Yin, Ming and Wang, Qinsi and Li, Hai and Chen, Yiran}, title = {Keyframe-oriented Vision Token Pruning: Enhancing Efficiency of Large Vision Language Models on Long-Form Video Processing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20802-20811} }
LongSplat: Robust Unposed 3D Gaussian Splatting for Casual Long Videos: Chin-Yang Lin,

Cheng Sun,

Fu-En Yang,

Min-Hung Chen,

Yen-Yu Lin,

Yu-Lun Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_ICCV, author = {Lin, Chin-Yang and Sun, Cheng and Yang, Fu-En and Chen, Min-Hung and Lin, Yen-Yu and Liu, Yu-Lun}, title = {LongSplat: Robust Unposed 3D Gaussian Splatting for Casual Long Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27412-27422} }
S3R-GS: Streamlining the Pipeline for Large-Scale Street Scene Reconstruction: Guangting Zheng,

Jiajun Deng,

Xiaomeng Chu,

Yu Yuan,

Houqiang Li,

Yanyong Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Guangting and Deng, Jiajun and Chu, Xiaomeng and Yuan, Yu and Li, Houqiang and Zhang, Yanyong}, title = {S3R-GS: Streamlining the Pipeline for Large-Scale Street Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25594-25604} }
Beyond One Shot, Beyond One Perspective: Cross-View and Long-Horizon Distillation for Better LiDAR Representations: Xiang Xu,

Lingdong Kong,

Song Wang,

Chuanwei Zhou,

Qingshan Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Xiang and Kong, Lingdong and Wang, Song and Zhou, Chuanwei and Liu, Qingshan}, title = {Beyond One Shot, Beyond One Perspective: Cross-View and Long-Horizon Distillation for Better LiDAR Representations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25506-25518} }
GaussianUpdate: Continual 3D Gaussian Splatting Update for Changing Environments: Lin Zeng,

Boming Zhao,

Jiarui Hu,

Xujie Shen,

Ziqiang Dang,

Hujun Bao,

Zhaopeng Cui; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2025_ICCV, author = {Zeng, Lin and Zhao, Boming and Hu, Jiarui and Shen, Xujie and Dang, Ziqiang and Bao, Hujun and Cui, Zhaopeng}, title = {GaussianUpdate: Continual 3D Gaussian Splatting Update for Changing Environments}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25800-25809} }
SiM3D: Single-instance Multiview Multimodal and Multisetup 3D Anomaly Detection Benchmark: Alex Costanzino,

Pierluigi Zama Ramirez,

Luigi Lella,

Matteo Ragaglia,

Alessandro Oliva,

Giuseppe Lisanti,

Luigi Di Stefano; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Costanzino_2025_ICCV, author = {Costanzino, Alex and Ramirez, Pierluigi Zama and Lella, Luigi and Ragaglia, Matteo and Oliva, Alessandro and Lisanti, Giuseppe and Di Stefano, Luigi}, title = {SiM3D: Single-instance Multiview Multimodal and Multisetup 3D Anomaly Detection Benchmark}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20944-20953} }
Teaching AI the Anatomy Behind the Scan: Addressing Anatomical Flaws in Medical Image Segmentation with Learnable Prior: Young Seok Jeon,

Hongfei Yang,

Huazhu Fu,

Mengling Feng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeon_2025_ICCV, author = {Jeon, Young Seok and Yang, Hongfei and Fu, Huazhu and Feng, Mengling}, title = {Teaching AI the Anatomy Behind the Scan: Addressing Anatomical Flaws in Medical Image Segmentation with Learnable Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24024-24033} }
WorldScore: A Unified Evaluation Benchmark for World Generation: Haoyi Duan,

Hong-Xing Yu,

Sirui Chen,

Li Fei-Fei,

Jiajun Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Duan_2025_ICCV, author = {Duan, Haoyi and Yu, Hong-Xing and Chen, Sirui and Fei-Fei, Li and Wu, Jiajun}, title = {WorldScore: A Unified Evaluation Benchmark for World Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27713-27724} }
EvaGaussians: Event Stream Assisted Gaussian Splatting from Blurry Images: Wangbo Yu,

Chaoran Feng,

Jianing Li,

Jiye Tang,

Jiashu Yang,

Zhenyu Tang,

Meng Cao,

Xu Jia,

Yuchao Yang,

Li Yuan,

Yonghong Tian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Wangbo and Feng, Chaoran and Li, Jianing and Tang, Jiye and Yang, Jiashu and Tang, Zhenyu and Cao, Meng and Jia, Xu and Yang, Yuchao and Yuan, Li and Tian, Yonghong}, title = {EvaGaussians: Event Stream Assisted Gaussian Splatting from Blurry Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24780-24790} }
CuMPerLay: Learning Cubical Multiparameter Persistence Vectorizations: Caner Korkmaz,

Brighton Nuwagira,

Baris Coskunuzer,

Tolga Birdal; [pdf] [supp]
[bibtex]
@InProceedings{Korkmaz_2025_ICCV, author = {Korkmaz, Caner and Nuwagira, Brighton and Coskunuzer, Baris and Birdal, Tolga}, title = {CuMPerLay: Learning Cubical Multiparameter Persistence Vectorizations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27084-27094} }
Recovering Parametric Scenes from Very Few Time-of-Flight Pixels: Carter Sifferman,

Yiquan Li,

Yiming Li,

Fangzhou Mu,

Michael Gleicher,

Mohit Gupta,

Yin Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sifferman_2025_ICCV, author = {Sifferman, Carter and Li, Yiquan and Li, Yiming and Mu, Fangzhou and Gleicher, Michael and Gupta, Mohit and Li, Yin}, title = {Recovering Parametric Scenes from Very Few Time-of-Flight Pixels}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27989-27999} }
Always Skip Attention: Yiping Ji,

Hemanth Saratchandran,

Peyman Moghadam,

Simon Lucey; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ji_2025_ICCV, author = {Ji, Yiping and Saratchandran, Hemanth and Moghadam, Peyman and Lucey, Simon}, title = {Always Skip Attention}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23115-23123} }
ExCap3D: Expressive 3D Scene Understanding via Object Captioning with Varying Detail: Chandan Yeshwanth,

Dávid Rozenberszki,

Angela Dai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yeshwanth_2025_ICCV, author = {Yeshwanth, Chandan and Rozenberszki, D\'avid and Dai, Angela}, title = {ExCap3D: Expressive 3D Scene Understanding via Object Captioning with Varying Detail}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21699-21709} }
LeGrad: An Explainability Method for Vision Transformers via Feature Formation Sensitivity: Walid Bousselham,

Angie Boggust,

Sofian Chaybouti,

Hendrik Strobelt,

Hilde Kuehne; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bousselham_2025_ICCV, author = {Bousselham, Walid and Boggust, Angie and Chaybouti, Sofian and Strobelt, Hendrik and Kuehne, Hilde}, title = {LeGrad: An Explainability Method for Vision Transformers via Feature Formation Sensitivity}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20336-20345} }
GaussianFlowOcc: Sparse and Weakly Supervised Occupancy Estimation using Gaussian Splatting and Temporal Flow: Simon Boeder,

Fabian Gigengack,

Benjamin Risse; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Boeder_2025_ICCV, author = {Boeder, Simon and Gigengack, Fabian and Risse, Benjamin}, title = {GaussianFlowOcc: Sparse and Weakly Supervised Occupancy Estimation using Gaussian Splatting and Temporal Flow}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24943-24954} }
OD-RASE: Ontology-Driven Risk Assessment and Safety Enhancement for Autonomous Driving: Kota Shimomura,

Masaki Nambata,

Atsuya Ishikawa,

Ryota Mimura,

Koki Inoue,

Takayoshi Yamashita,

Takayuki Kawabuchi; [pdf] [supp]
[bibtex]
@InProceedings{Shimomura_2025_ICCV, author = {Shimomura, Kota and Nambata, Masaki and Ishikawa, Atsuya and Mimura, Ryota and Inoue, Koki and Yamashita, Takayoshi and Kawabuchi, Takayuki}, title = {OD-RASE: Ontology-Driven Risk Assessment and Safety Enhancement for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26167-26177} }
Serialization based Point Cloud Oversegmentation: Chenghui Lu,

Jianlong Kwan,

Dilong Li,

Ziyi Chen,

Haiyan Guan; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Chenghui and Kwan, Jianlong and Li, Dilong and Chen, Ziyi and Guan, Haiyan}, title = {Serialization based Point Cloud Oversegmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25831-25840} }
Latent Expression Generation for Referring Image Segmentation and Grounding: Seonghoon Yu,

Joonbeom Hong,

Joonseok Lee,

Jeany Son; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Seonghoon and Hong, Joonbeom and Lee, Joonseok and Son, Jeany}, title = {Latent Expression Generation for Referring Image Segmentation and Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21374-21383} }
Beyond Single Images: Retrieval Self-Augmented Unsupervised Camouflaged Object Detection: Ji Du,

Xin Wang,

Fangwei Hao,

Mingyang Yu,

Chunyuan Chen,

Jiesheng Wu,

Bin Wang,

Jing Xu,

Ping Li; [pdf] [supp]
[bibtex]
@InProceedings{Du_2025_ICCV, author = {Du, Ji and Wang, Xin and Hao, Fangwei and Yu, Mingyang and Chen, Chunyuan and Wu, Jiesheng and Wang, Bin and Xu, Jing and Li, Ping}, title = {Beyond Single Images: Retrieval Self-Augmented Unsupervised Camouflaged Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22131-22142} }
GeoSplatting: Towards Geometry Guided Gaussian Splatting for Physically-based Inverse Rendering: Kai Ye,

Chong Gao,

Guanbin Li,

Wenzheng Chen,

Baoquan Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2025_ICCV, author = {Ye, Kai and Gao, Chong and Li, Guanbin and Chen, Wenzheng and Chen, Baoquan}, title = {GeoSplatting: Towards Geometry Guided Gaussian Splatting for Physically-based Inverse Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28991-29000} }
Temporal-aware Query Routing for Real-time Video Instance Segmentation: Zesen Cheng,

Kehan Li,

Yian Zhao,

Hang Zhang,

Chang Liu,

Jie Chen; [pdf]
[bibtex]
@InProceedings{Cheng_2025_ICCV, author = {Cheng, Zesen and Li, Kehan and Zhao, Yian and Zhang, Hang and Liu, Chang and Chen, Jie}, title = {Temporal-aware Query Routing for Real-time Video Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22467-22476} }
CapeLLM: Support-Free Category-Agnostic Pose Estimation with Multimodal Large Language Models: Junho Kim,

Hyungjin Chung,

Byung-Hoon Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Junho and Chung, Hyungjin and Kim, Byung-Hoon}, title = {CapeLLM: Support-Free Category-Agnostic Pose Estimation with Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22889-22898} }
VisHall3D: Monocular Semantic Scene Completion from Reconstructing the Visible Regions to Hallucinating the Invisible Regions: Haoang Lu,

Yuanqi Su,

Xiaoning Zhang,

Longjun Gao,

Yu Xue,

Le Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Haoang and Su, Yuanqi and Zhang, Xiaoning and Gao, Longjun and Xue, Yu and Wang, Le}, title = {VisHall3D: Monocular Semantic Scene Completion from Reconstructing the Visible Regions to Hallucinating the Invisible Regions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28674-28684} }
Controllable 3D Outdoor Scene Generation via Scene Graphs: Yuheng Liu,

Xinke Li,

Yuning Zhang,

Lu Qi,

Xin Li,

Wenping Wang,

Chongshou Li,

Xueting Li,

Ming-Hsuan Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Yuheng and Li, Xinke and Zhang, Yuning and Qi, Lu and Li, Xin and Wang, Wenping and Li, Chongshou and Li, Xueting and Yang, Ming-Hsuan}, title = {Controllable 3D Outdoor Scene Generation via Scene Graphs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28052-28062} }
UniOcc: A Unified Benchmark for Occupancy Forecasting and Prediction in Autonomous Driving: Yuping Wang,

Xiangyu Huang,

Xiaokang Sun,

Mingxuan Yan,

Shuo Xing,

Zhengzhong Tu,

Jiachen Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yuping and Huang, Xiangyu and Sun, Xiaokang and Yan, Mingxuan and Xing, Shuo and Tu, Zhengzhong and Li, Jiachen}, title = {UniOcc: A Unified Benchmark for Occupancy Forecasting and Prediction in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25560-25570} }
NGD: Neural Gradient Based Deformation for Monocular Garment Reconstruction: Soham Dasgupta,

Shanthika Naik,

Preet Savalia,

Sujay Kumar Ingle,

Avinash Sharma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dasgupta_2025_ICCV, author = {Dasgupta, Soham and Naik, Shanthika and Savalia, Preet and Ingle, Sujay Kumar and Sharma, Avinash}, title = {NGD: Neural Gradient Based Deformation for Monocular Garment Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25485-25495} }
U-ViLAR: Uncertainty-Aware Visual Localization for Autonomous Driving via Differentiable Association and Registration: Xiaofan Li,

Zhihao Xu,

Chenming Wu,

Zhao Yang,

Yumeng Zhang,

Jiang-Jiang Liu,

Haibao Yu,

Xiaoqing Ye,

Yuan Wang,

Shirui Li,

Xun Sun,

Ji Wan,

Jun Wang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Xiaofan and Xu, Zhihao and Wu, Chenming and Yang, Zhao and Zhang, Yumeng and Liu, Jiang-Jiang and Yu, Haibao and Ye, Xiaoqing and Wang, Yuan and Li, Shirui and Sun, Xun and Wan, Ji and Wang, Jun}, title = {U-ViLAR: Uncertainty-Aware Visual Localization for Autonomous Driving via Differentiable Association and Registration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24889-24898} }
CF3: Compact and Fast 3D Feature Fields: Hyunjoon Lee,

Joonkyu Min,

Jaesik Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Hyunjoon and Min, Joonkyu and Park, Jaesik}, title = {CF3: Compact and Fast 3D Feature Fields}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27906-27916} }
Few-Shot Pattern Detection via Template Matching and Regression: Eunchan Jo,

Dahyun Kang,

Sanghyun Kim,

Yunseon Choi,

Minsu Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jo_2025_ICCV, author = {Jo, Eunchan and Kang, Dahyun and Kim, Sanghyun and Choi, Yunseon and Cho, Minsu}, title = {Few-Shot Pattern Detection via Template Matching and Regression}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21578-21588} }
Leveraging BEV Paradigm for Ground-to-Aerial Image Synthesis: Junyan Ye,

Jun He,

Weijia Li,

Zhutao Lv,

Yi Lin,

Jinhua Yu,

Haote Yang,

Conghui He; [pdf] [arXiv]
[bibtex]
@InProceedings{Ye_2025_ICCV, author = {Ye, Junyan and He, Jun and Li, Weijia and Lv, Zhutao and Lin, Yi and Yu, Jinhua and Yang, Haote and He, Conghui}, title = {Leveraging BEV Paradigm for Ground-to-Aerial Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28451-28461} }
MultiverSeg: Scalable Interactive Segmentation of Biomedical Imaging Datasets with In-Context Guidance: Hallee E. Wong,

Jose Javier Gonzalez Ortiz,

John Guttag,

Adrian V. Dalca; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wong_2025_ICCV, author = {Wong, Hallee E. and Ortiz, Jose Javier Gonzalez and Guttag, John and Dalca, Adrian V.}, title = {MultiverSeg: Scalable Interactive Segmentation of Biomedical Imaging Datasets with In-Context Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20966-20980} }
MuGS: Multi-Baseline Generalizable Gaussian Splatting Reconstruction: Yaopeng Lou,

Liao Shen,

Tianqi Liu,

Jiaqi Li,

Zihao Huang,

Huiqiang Sun,

Zhiguo Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lou_2025_ICCV, author = {Lou, Yaopeng and Shen, Liao and Liu, Tianqi and Li, Jiaqi and Huang, Zihao and Sun, Huiqiang and Cao, Zhiguo}, title = {MuGS: Multi-Baseline Generalizable Gaussian Splatting Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25583-25593} }
Aligning Effective Tokens with Video Anomaly in Large Language Models: Yingxian Chen,

Jiahui Liu,

Ruidi Fan,

Yanwei Li,

Chirui Chang,

Shizhen Zhao,

Wilton W. T. Fok,

Xiaojuan Qi,

Yik-Chung Wu; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Yingxian and Liu, Jiahui and Fan, Ruidi and Li, Yanwei and Chang, Chirui and Zhao, Shizhen and Fok, Wilton W. T. and Qi, Xiaojuan and Wu, Yik-Chung}, title = {Aligning Effective Tokens with Video Anomaly in Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22695-22706} }
Benchmarking Burst Super-Resolution for Polarization Images: Noise Dataset and Analysis: Inseung Hwang,

Kiseok Choi,

Hyunho Ha,

Min H. Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hwang_2025_ICCV, author = {Hwang, Inseung and Choi, Kiseok and Ha, Hyunho and Kim, Min H.}, title = {Benchmarking Burst Super-Resolution for Polarization Images: Noise Dataset and Analysis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24899-24909} }
JointDiT: Enhancing RGB-Depth Joint Modeling with Diffusion Transformers: Kwon Byung-Ki,

Qi Dai,

Lee Hyoseok,

Chong Luo,

Tae-Hyun Oh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Byung-Ki_2025_ICCV, author = {Byung-Ki, Kwon and Dai, Qi and Hyoseok, Lee and Luo, Chong and Oh, Tae-Hyun}, title = {JointDiT: Enhancing RGB-Depth Joint Modeling with Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25261-25271} }
Discretized Gaussian Representation for Tomographic Reconstruction: Shaokai Wu,

Yuxiang Lu,

Yapan Guo,

Wei Ji,

Suizhi Huang,

Fengyu Yang,

Shalayiding Sirejiding,

Qichen He,

Jing Tong,

Yanbiao Ji,

Yue Ding,

Hongtao Lu; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Shaokai and Lu, Yuxiang and Guo, Yapan and Ji, Wei and Huang, Suizhi and Yang, Fengyu and Sirejiding, Shalayiding and He, Qichen and Tong, Jing and Ji, Yanbiao and Ding, Yue and Lu, Hongtao}, title = {Discretized Gaussian Representation for Tomographic Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25073-25082} }
Language Decoupling with Fine-grained Knowledge Guidance for Referring Multi-object Tracking: Guangyao Li,

Siping Zhuang,

Yajun Jian,

Yan Yan,

Hanzi Wang; [pdf]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Guangyao and Zhuang, Siping and Jian, Yajun and Yan, Yan and Wang, Hanzi}, title = {Language Decoupling with Fine-grained Knowledge Guidance for Referring Multi-object Tracking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23626-23635} }
Referring Expression Comprehension for Small Objects: Kanoko Goto,

Takumi Hirose,

Mahiro Ukai,

Shuhei Kurita,

Nakamasa Inoue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Goto_2025_ICCV, author = {Goto, Kanoko and Hirose, Takumi and Ukai, Mahiro and Kurita, Shuhei and Inoue, Nakamasa}, title = {Referring Expression Comprehension for Small Objects}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21231-21242} }
InstaDrive: Instance-Aware Driving World Models for Realistic and Consistent Video Generation: Zhuoran Yang,

Xi Guo,

Chenjing Ding,

Chiyu Wang,

Wei Wu,

Yanyong Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Zhuoran and Guo, Xi and Ding, Chenjing and Wang, Chiyu and Wu, Wei and Zhang, Yanyong}, title = {InstaDrive: Instance-Aware Driving World Models for Realistic and Consistent Video Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25410-25420} }
Global-Aware Monocular Semantic Scene Completion with State Space Models: Shijie Li,

Zhongyao Cheng,

Rong Li,

Shuai Li,

Juergen Gall,

Xun Xu,

Xulei Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Shijie and Cheng, Zhongyao and Li, Rong and Li, Shuai and Gall, Juergen and Xu, Xun and Yang, Xulei}, title = {Global-Aware Monocular Semantic Scene Completion with State Space Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25550-25559} }
3D Gaussian Splatting Driven Multi-View Robust Physical Adversarial Camouflage Generation: Tianrui Lou,

Xiaojun Jia,

Siyuan Liang,

Jiawei Liang,

Ming Zhang,

Yanjun Xiao,

Xiaochun Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lou_2025_ICCV, author = {Lou, Tianrui and Jia, Xiaojun and Liang, Siyuan and Liang, Jiawei and Zhang, Ming and Xiao, Yanjun and Cao, Xiaochun}, title = {3D Gaussian Splatting Driven Multi-View Robust Physical Adversarial Camouflage Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28752-28762} }
SuperMat: Physically Consistent PBR Material Estimation at Interactive Rates: Yijia Hong,

Yuan-Chen Guo,

Ran Yi,

Yulong Chen,

Yan-Pei Cao,

Lizhuang Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hong_2025_ICCV, author = {Hong, Yijia and Guo, Yuan-Chen and Yi, Ran and Chen, Yulong and Cao, Yan-Pei and Ma, Lizhuang}, title = {SuperMat: Physically Consistent PBR Material Estimation at Interactive Rates}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25083-25093} }
HiMTok: Learning Hierarchical Mask Tokens for Image Segmentation with Large Multimodal Model: Tao Wang,

Changxu Cheng,

Lingfeng Wang,

Senda Chen,

Wuyue Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Tao and Cheng, Changxu and Wang, Lingfeng and Chen, Senda and Zhao, Wuyue}, title = {HiMTok: Learning Hierarchical Mask Tokens for Image Segmentation with Large Multimodal Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23267-23278} }
MixA-Q: Revisiting Activation Sparsity for Vision Transformers from a Mixed-Precision Quantization Perspective: Weitian Wang,

Rai Shubham,

Cecilia De La Parra,

Akash Kumar; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Weitian and Shubham, Rai and De La Parra, Cecilia and Kumar, Akash}, title = {MixA-Q: Revisiting Activation Sparsity for Vision Transformers from a Mixed-Precision Quantization Perspective}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22143-22152} }
EMD: Explicit Motion Modeling for High-Quality Street Gaussian Splatting: Xiaobao Wei,

Qingpo Wuwu,

Zhongyu Zhao,

Zhuangzhe Wu,

Nan Huang,

Ming Lu,

Ningning Ma,

Shanghang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2025_ICCV, author = {Wei, Xiaobao and Wuwu, Qingpo and Zhao, Zhongyu and Wu, Zhuangzhe and Huang, Nan and Lu, Ming and Ma, Ningning and Zhang, Shanghang}, title = {EMD: Explicit Motion Modeling for High-Quality Street Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28462-28472} }
Neural Multi-View Self-Calibrated Photometric Stereo without Photometric Stereo Cues: Xu Cao,

Takafumi Taketomi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2025_ICCV, author = {Cao, Xu and Taketomi, Takafumi}, title = {Neural Multi-View Self-Calibrated Photometric Stereo without Photometric Stereo Cues}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27552-27562} }
Multi-Modal Multi-Task Unified Embedding Model (M3T-UEM): A Task-Adaptive Representation Learning Framework: Rohan Sharma,

Changyou Chen,

Feng-Ju Chang,

Seongjun Yun,

Xiaohu Xie,

Rui Meng,

Dehong Xu,

Alejandro Mottini,

Qingjun Cui; [pdf] [supp]
[bibtex]
@InProceedings{Sharma_2025_ICCV, author = {Sharma, Rohan and Chen, Changyou and Chang, Feng-Ju and Yun, Seongjun and Xie, Xiaohu and Meng, Rui and Xu, Dehong and Mottini, Alejandro and Cui, Qingjun}, title = {Multi-Modal Multi-Task Unified Embedding Model (M3T-UEM): A Task-Adaptive Representation Learning Framework}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22783-22793} }
Towards More Diverse and Challenging Pre-training for Point Cloud Learning: Self-Supervised Cross Reconstruction with Decoupled Views: Xiangdong Zhang,

Shaofeng Zhang,

Junchi Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xiangdong and Zhang, Shaofeng and Yan, Junchi}, title = {Towards More Diverse and Challenging Pre-training for Point Cloud Learning: Self-Supervised Cross Reconstruction with Decoupled Views}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28696-28706} }
Baking Gaussian Splatting into Diffusion Denoiser for Fast and Scalable Single-stage Image-to-3D Generation and Reconstruction: Yuanhao Cai,

He Zhang,

Kai Zhang,

Yixun Liang,

Mengwei Ren,

Fujun Luan,

Qing Liu,

Soo Ye Kim,

Jianming Zhang,

Zhifei Zhang,

Yuqian Zhou,

Yulun Zhang,

Xiaokang Yang,

Zhe Lin,

Alan Yuille; [pdf] [arXiv]
[bibtex]
@InProceedings{Cai_2025_ICCV, author = {Cai, Yuanhao and Zhang, He and Zhang, Kai and Liang, Yixun and Ren, Mengwei and Luan, Fujun and Liu, Qing and Kim, Soo Ye and Zhang, Jianming and Zhang, Zhifei and Zhou, Yuqian and Zhang, Yulun and Yang, Xiaokang and Lin, Zhe and Yuille, Alan}, title = {Baking Gaussian Splatting into Diffusion Denoiser for Fast and Scalable Single-stage Image-to-3D Generation and Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25062-25072} }
Unsupervised Imaging Inverse Problems with Diffusion Distribution Matching: Giacomo Meanti,

Thomas Ryckeboer,

Michael Arbel,

Julien Mairal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Meanti_2025_ICCV, author = {Meanti, Giacomo and Ryckeboer, Thomas and Arbel, Michael and Mairal, Julien}, title = {Unsupervised Imaging Inverse Problems with Diffusion Distribution Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28364-28374} }
Integrating Biological Knowledge for Robust Microscopy Image Profiling on De Novo Cell Lines: Jiayuan Chen,

Thai-Hoang Pham,

Yuanlong Wang,

Ping Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Jiayuan and Pham, Thai-Hoang and Wang, Yuanlong and Zhang, Ping}, title = {Integrating Biological Knowledge for Robust Microscopy Image Profiling on De Novo Cell Lines}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22846-22856} }
OuroMamba: A Data-Free Quantization Framework for Vision Mamba: Akshat Ramachandran,

Mingyu Lee,

Huan Xu,

Souvik Kundu,

Tushar Krishna; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ramachandran_2025_ICCV, author = {Ramachandran, Akshat and Lee, Mingyu and Xu, Huan and Kundu, Souvik and Krishna, Tushar}, title = {OuroMamba: A Data-Free Quantization Framework for Vision Mamba}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21177-21186} }
MA-CIR: A Multimodal Arithmetic Benchmark for Composed Image Retrieval: Jaeseok Byun,

Young Kyun Jang,

Seokhyeon Jeong,

Donghyun Kim,

Taesup Moon; [pdf] [supp]
[bibtex]
@InProceedings{Byun_2025_ICCV, author = {Byun, Jaeseok and Jang, Young Kyun and Jeong, Seokhyeon and Kim, Donghyun and Moon, Taesup}, title = {MA-CIR: A Multimodal Arithmetic Benchmark for Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21342-21352} }
Large-scale Pre-training for Grounded Video Caption Generation: Evangelos Kazakos,

Cordelia Schmid,

Josef Sivic; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kazakos_2025_ICCV, author = {Kazakos, Evangelos and Schmid, Cordelia and Sivic, Josef}, title = {Large-scale Pre-training for Grounded Video Caption Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24434-24444} }
SimMLM: A Simple Framework for Multi-modal Learning with Missing Modality: Sijie Li,

Chen Chen,

Jungong Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Sijie and Chen, Chen and Han, Jungong}, title = {SimMLM: A Simple Framework for Multi-modal Learning with Missing Modality}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24068-24077} }
LINR-PCGC: Lossless Implicit Neural Representations for Point Cloud Geometry Compression: Wenjie Huang,

Qi Yang,

Shuting Xia,

He Huang,

Yiling Xu,

Zhu Li; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Wenjie and Yang, Qi and Xia, Shuting and Huang, He and Xu, Yiling and Li, Zhu}, title = {LINR-PCGC: Lossless Implicit Neural Representations for Point Cloud Geometry Compression}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28577-28586} }
RoCo-Sim: Enhancing Roadside Collaborative Perception through Foreground Simulation: Yuwen Du,

Anning Hu,

Zichen Chao,

Yifan Lu,

Junhao Ge,

Genjia Liu,

Weitao Wu,

Lanjun Wang,

Siheng Chen; [pdf]
[bibtex]
@InProceedings{Du_2025_ICCV, author = {Du, Yuwen and Hu, Anning and Chao, Zichen and Lu, Yifan and Ge, Junhao and Liu, Genjia and Wu, Weitao and Wang, Lanjun and Chen, Siheng}, title = {RoCo-Sim: Enhancing Roadside Collaborative Perception through Foreground Simulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26977-26986} }
UKBOB: One Billion MRI Labeled Masks for Generalizable 3D Medical Image Segmentation: Emmanuelle Bourigault,

Amir Jamaludin,

Abdullah Hamdi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bourigault_2025_ICCV, author = {Bourigault, Emmanuelle and Jamaludin, Amir and Hamdi, Abdullah}, title = {UKBOB: One Billion MRI Labeled Masks for Generalizable 3D Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21600-21611} }
GaussianOcc: Fully Self-supervised and Efficient 3D Occupancy Estimation with Gaussian Splatting: Wanshui Gan,

Fang Liu,

Hongbin Xu,

Ningkai Mo,

Naoto Yokoya; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gan_2025_ICCV, author = {Gan, Wanshui and Liu, Fang and Xu, Hongbin and Mo, Ningkai and Yokoya, Naoto}, title = {GaussianOcc: Fully Self-supervised and Efficient 3D Occupancy Estimation with Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28980-28990} }
Free-MoRef: Instantly Multiplexing Context Perception Capabilities of Video-MLLMs within Single Inference: Kuo Wang,

Quanlong Zheng,

Junlin Xie,

Yanhao Zhang,

Jinguo Luo,

Haonan Lu,

Liang Lin,

Fan Zhou,

Guanbin Li; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Kuo and Zheng, Quanlong and Xie, Junlin and Zhang, Yanhao and Luo, Jinguo and Lu, Haonan and Lin, Liang and Zhou, Fan and Li, Guanbin}, title = {Free-MoRef: Instantly Multiplexing Context Perception Capabilities of Video-MLLMs within Single Inference}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22499-22508} }
Liberated-GS: 3D Gaussian Splatting Independent from SfM Point Clouds: Weihong Pan,

Xiaoyu Zhang,

Hongjia Zhai,

Xiaojun Xiang,

Hanqing Jiang,

Guofeng Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Pan_2025_ICCV, author = {Pan, Weihong and Zhang, Xiaoyu and Zhai, Hongjia and Xiang, Xiaojun and Jiang, Hanqing and Zhang, Guofeng}, title = {Liberated-GS: 3D Gaussian Splatting Independent from SfM Point Clouds}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26675-26685} }
DecAD: Decoupling Anomalies in Latent Space for Multi-Class Unsupervised Anomaly Detection: Xiaolei Wang,

Xiaoyang Wang,

Huihui Bai,

Eng Gee Lim,

Jimin Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Xiaolei and Wang, Xiaoyang and Bai, Huihui and Lim, Eng Gee and Xiao, Jimin}, title = {DecAD: Decoupling Anomalies in Latent Space for Multi-Class Unsupervised Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21568-21577} }
Kaputt: A Large-Scale Dataset for Visual Defect Detection: Sebastian Höfer,

Dorian F. Henning,

Artemij Amiranashvili,

Douglas Morrison,

Mariliza Tzes,

Ingmar Posner,

Marc Matvienko,

Alessandro Rennola,

Anton Milan; [pdf] [supp]
[bibtex]
@InProceedings{Hofer_2025_ICCV, author = {H\"ofer, Sebastian and Henning, Dorian F. and Amiranashvili, Artemij and Morrison, Douglas and Tzes, Mariliza and Posner, Ingmar and Matvienko, Marc and Rennola, Alessandro and Milan, Anton}, title = {Kaputt: A Large-Scale Dataset for Visual Defect Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24224-24233} }
Occupancy Learning with Spatiotemporal Memory: Ziyang Leng,

Jiawei Yang,

Wenlong Yi,

Bolei Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Leng_2025_ICCV, author = {Leng, Ziyang and Yang, Jiawei and Yi, Wenlong and Zhou, Bolei}, title = {Occupancy Learning with Spatiotemporal Memory}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26569-26578} }
ReferEverything: Towards Segmenting Everything We Can Speak of in Videos: Anurag Bagchi,

Zhipeng Bao,

Yu-Xiong Wang,

Pavel Tokmakov,

Martial Hebert; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bagchi_2025_ICCV, author = {Bagchi, Anurag and Bao, Zhipeng and Wang, Yu-Xiong and Tokmakov, Pavel and Hebert, Martial}, title = {ReferEverything: Towards Segmenting Everything We Can Speak of in Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23221-23231} }
Make Your Training Flexible: Towards Deployment-Efficient Video Models: Chenting Wang,

Kunchang Li,

Tianxiang Jiang,

Xiangyu Zeng,

Yi Wang,

Limin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Chenting and Li, Kunchang and Jiang, Tianxiang and Zeng, Xiangyu and Wang, Yi and Wang, Limin}, title = {Make Your Training Flexible: Towards Deployment-Efficient Video Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23880-23891} }
RadarSplat: Radar Gaussian Splatting for High-Fidelity Data Synthesis and 3D Reconstruction of Autonomous Driving Scenes: Pou-Chun Kung,

Skanda Harisha,

Ram Vasudevan,

Aline Eid,

Katherine A. Skinner; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kung_2025_ICCV, author = {Kung, Pou-Chun and Harisha, Skanda and Vasudevan, Ram and Eid, Aline and Skinner, Katherine A.}, title = {RadarSplat: Radar Gaussian Splatting for High-Fidelity Data Synthesis and 3D Reconstruction of Autonomous Driving Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27596-27606} }
Hierarchical 3D Scene Graphs Construction Outdoors: Jon Nyffeler,

Federico Tombari,

Daniel Barath; [pdf] [supp]
[bibtex]
@InProceedings{Nyffeler_2025_ICCV, author = {Nyffeler, Jon and Tombari, Federico and Barath, Daniel}, title = {Hierarchical 3D Scene Graphs Construction Outdoors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26817-26826} }
SIC: Similarity-Based Interpretable Image Classification with Neural Networks: Tom Nuno Wolf,

Emre Kavak,

Fabian Bongratz,

Christian Wachinger; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wolf_2025_ICCV, author = {Wolf, Tom Nuno and Kavak, Emre and Bongratz, Fabian and Wachinger, Christian}, title = {SIC: Similarity-Based Interpretable Image Classification with Neural Networks}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24276-24285} }
CoLMDriver: LLM-based Negotiation Benefits Cooperative Autonomous Driving: Changxing Liu,

Genjia Liu,

Zijun Wang,

Jinchang Yang,

Siheng Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Changxing and Liu, Genjia and Wang, Zijun and Yang, Jinchang and Chen, Siheng}, title = {CoLMDriver: LLM-based Negotiation Benefits Cooperative Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25951-25960} }
InsideOut: Integrated RGB-Radiative Gaussian Splatting for Comprehensive 3D Object Representation: Jungmin Lee,

Seonghyuk Hong,

Juyong Lee,

Jaeyoon Lee,

Jongwon Choi; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Jungmin and Hong, Seonghyuk and Lee, Juyong and Lee, Jaeyoon and Choi, Jongwon}, title = {InsideOut: Integrated RGB-Radiative Gaussian Splatting for Comprehensive 3D Object Representation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25820-25830} }
Splat-LOAM: Gaussian Splatting LiDAR Odometry and Mapping: Emanuele Giacomini,

Luca Di Giammarino,

Lorenzo De Rebotti,

Giorgio Grisetti,

Martin R. Oswald; [pdf] [supp]
[bibtex]
@InProceedings{Giacomini_2025_ICCV, author = {Giacomini, Emanuele and Di Giammarino, Luca and De Rebotti, Lorenzo and Grisetti, Giorgio and Oswald, Martin R.}, title = {Splat-LOAM: Gaussian Splatting LiDAR Odometry and Mapping}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27630-27639} }
RTMap: Real-Time Recursive Mapping with Change Detection and Localization: Yuheng Du,

Sheng Yang,

Lingxuan Wang,

Zhenghua Hou,

Chengying Cai,

Zhitao Tan,

Mingxia Chen,

Shi-Sheng Huang,

Qiang Li; [pdf] [arXiv]
[bibtex]
@InProceedings{Du_2025_ICCV, author = {Du, Yuheng and Yang, Sheng and Wang, Lingxuan and Hou, Zhenghua and Cai, Chengying and Tan, Zhitao and Chen, Mingxia and Huang, Shi-Sheng and Li, Qiang}, title = {RTMap: Real-Time Recursive Mapping with Change Detection and Localization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28021-28030} }
Exploiting Domain Properties in Language-Driven Domain Generalization for Semantic Segmentation: Seogkyu Jeon,

Kibeom Hong,

Hyeran Byun; [pdf] [supp]
[bibtex]
@InProceedings{Jeon_2025_ICCV, author = {Jeon, Seogkyu and Hong, Kibeom and Byun, Hyeran}, title = {Exploiting Domain Properties in Language-Driven Domain Generalization for Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20791-20801} }
OVG-HQ: Online Video Grounding with Hybrid-modal Queries: Runhao Zeng,

Jiaqi Mao,

Minghao Lai,

Minh Hieu Phan,

Yanjie Dong,

Wei Wang,

Qi Chen,

Xiping Hu; [pdf] [supp]
[bibtex]
@InProceedings{Zeng_2025_ICCV, author = {Zeng, Runhao and Mao, Jiaqi and Lai, Minghao and Phan, Minh Hieu and Dong, Yanjie and Wang, Wei and Chen, Qi and Hu, Xiping}, title = {OVG-HQ: Online Video Grounding with Hybrid-modal Queries}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21085-21096} }
ConformalSAM: Unlocking the Potential of Foundational Segmentation Models in Semi-Supervised Semantic Segmentation with Conformal Prediction: Danhui Chen,

Ziquan Liu,

Chuxi Yang,

Dan Wang,

Yan Yan,

Yi Xu,

Xiangyang Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Danhui and Liu, Ziquan and Yang, Chuxi and Wang, Dan and Yan, Yan and Xu, Yi and Ji, Xiangyang}, title = {ConformalSAM: Unlocking the Potential of Foundational Segmentation Models in Semi-Supervised Semantic Segmentation with Conformal Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24045-24055} }
V2PE: Improving Multimodal Long-Context Capability of Vision-Language Models with Variable Visual Position Encoding: Junqi Ge,

Ziyi Chen,

Jintao Lin,

Jinguo Zhu,

Xihui Liu,

Jifeng Dai,

Xizhou Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ge_2025_ICCV, author = {Ge, Junqi and Chen, Ziyi and Lin, Jintao and Zhu, Jinguo and Liu, Xihui and Dai, Jifeng and Zhu, Xizhou}, title = {V2PE: Improving Multimodal Long-Context Capability of Vision-Language Models with Variable Visual Position Encoding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21070-21084} }
Multi-modal Segment Anything Model for Camouflaged Scene Segmentation: Guangyu Ren,

Hengyan Liu,

Michalis Lazarou,

Tania Stathaki; [pdf]
[bibtex]
@InProceedings{Ren_2025_ICCV, author = {Ren, Guangyu and Liu, Hengyan and Lazarou, Michalis and Stathaki, Tania}, title = {Multi-modal Segment Anything Model for Camouflaged Scene Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19882-19892} }
LLaVA-PruMerge: Adaptive Token Reduction for Efficient Large Multimodal Models: Yuzhang Shang,

Mu Cai,

Bingxin Xu,

Yong Jae Lee,

Yan Yan; [pdf] [supp]
[bibtex]
@InProceedings{Shang_2025_ICCV, author = {Shang, Yuzhang and Cai, Mu and Xu, Bingxin and Lee, Yong Jae and Yan, Yan}, title = {LLaVA-PruMerge: Adaptive Token Reduction for Efficient Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22857-22867} }
Event-boosted Deformable 3D Gaussians for Dynamic Scene Reconstruction: Wenhao Xu,

Wenming Weng,

Yueyi Zhang,

Ruikang Xu,

Zhiwei Xiong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Wenhao and Weng, Wenming and Zhang, Yueyi and Xu, Ruikang and Xiong, Zhiwei}, title = {Event-boosted Deformable 3D Gaussians for Dynamic Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28334-28343} }
Stereo Any Video: Temporally Consistent Stereo Matching: Junpeng Jing,

Weixun Luo,

Ye Mao,

Krystian Mikolajczyk; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jing_2025_ICCV, author = {Jing, Junpeng and Luo, Weixun and Mao, Ye and Mikolajczyk, Krystian}, title = {Stereo Any Video: Temporally Consistent Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20836-20846} }
Constraint-Aware Feature Learning for Parametric Point Cloud: Xi Cheng,

Ruiqi Lei,

Di Huang,

Zhichao Liao,

Fengyuan Piao,

Yan Chen,

Pingfa Feng,

Long Zeng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2025_ICCV, author = {Cheng, Xi and Lei, Ruiqi and Huang, Di and Liao, Zhichao and Piao, Fengyuan and Chen, Yan and Feng, Pingfa and Zeng, Long}, title = {Constraint-Aware Feature Learning for Parametric Point Cloud}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28114-28124} }
Revisiting Point Cloud Completion: Are We Ready For The Real-World?: Stuti Pathak,

Prashant Kumar,

Dheeraj Baiju,

Nicholus Mboga,

Gunther Steenackers,

Rudi Penne; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pathak_2025_ICCV, author = {Pathak, Stuti and Kumar, Prashant and Baiju, Dheeraj and Mboga, Nicholus and Steenackers, Gunther and Penne, Rudi}, title = {Revisiting Point Cloud Completion: Are We Ready For The Real-World?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25388-25398} }
Structure-aware Semantic Discrepancy and Consistency for 3D Medical Image Self-supervised Learning: Tan Pan,

Zhaorui Tan,

Kaiyu Guo,

Dongli Xu,

Weidi Xu,

Chen Jiang,

Xin Guo,

Yuan Qi,

Yuan Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2025_ICCV, author = {Pan, Tan and Tan, Zhaorui and Guo, Kaiyu and Xu, Dongli and Xu, Weidi and Jiang, Chen and Guo, Xin and Qi, Yuan and Cheng, Yuan}, title = {Structure-aware Semantic Discrepancy and Consistency for 3D Medical Image Self-supervised Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20257-20267} }
MagShield: Towards Better Robustness in Sparse Inertial Motion Capture Under Magnetic Disturbances: Yunzhe Shao,

Xinyu Yi,

Lu Yin,

Shihui Guo,

Junhai Yong,

Feng Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shao_2025_ICCV, author = {Shao, Yunzhe and Yi, Xinyu and Yin, Lu and Guo, Shihui and Yong, Junhai and Xu, Feng}, title = {MagShield: Towards Better Robustness in Sparse Inertial Motion Capture Under Magnetic Disturbances}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29021-29030} }
Bias-Resilient Weakly Supervised Semantic Segmentation Using Normalizing Flows: Xianglin Qiu,

Xiaoyang Wang,

Zhen Zhang,

Jimin Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Qiu_2025_ICCV, author = {Qiu, Xianglin and Wang, Xiaoyang and Zhang, Zhen and Xiao, Jimin}, title = {Bias-Resilient Weakly Supervised Semantic Segmentation Using Normalizing Flows}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21321-21330} }
UAVScenes: A Multi-Modal Dataset for UAVs: Sijie Wang,

Siqi Li,

Yawei Zhang,

Shangshu Yu,

Shenghai Yuan,

Rui She,

Quanjiang Guo,

JinXuan Zheng,

Ong Kang Howe,

Leonrich Chandra,

Shrivarshann Srijeyan,

Aditya Sivadas,

Toshan Aggarwal,

Heyuan Liu,

Hongming Zhang,

Chujie Chen,

Junyu Jiang,

Lihua Xie,

Wee Peng Tay; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Sijie and Li, Siqi and Zhang, Yawei and Yu, Shangshu and Yuan, Shenghai and She, Rui and Guo, Quanjiang and Zheng, JinXuan and Howe, Ong Kang and Chandra, Leonrich and Srijeyan, Shrivarshann and Sivadas, Aditya and Aggarwal, Toshan and Liu, Heyuan and Zhang, Hongming and Chen, Chujie and Jiang, Junyu and Xie, Lihua and Tay, Wee Peng}, title = {UAVScenes: A Multi-Modal Dataset for UAVs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28946-28958} }
TPG-INR: Target Prior-Guided Implicit 3D CT Reconstruction for Enhanced Sparse-view Imaging: Qinglei Cao,

Ziyao Tang,

Xiaoqin Tang; [pdf]
[bibtex]
@InProceedings{Cao_2025_ICCV, author = {Cao, Qinglei and Tang, Ziyao and Tang, Xiaoqin}, title = {TPG-INR: Target Prior-Guided Implicit 3D CT Reconstruction for Enhanced Sparse-view Imaging}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28239-28248} }
S4M: Boosting Semi-Supervised Instance Segmentation with SAM: Heeji Yoon,

Heeseong Shin,

Eunbeen Hong,

Hyunwook Choi,

Hansang Cho,

Daun Jeong,

Seungryong Kim; [pdf] [supp]
[bibtex]
@InProceedings{Yoon_2025_ICCV, author = {Yoon, Heeji and Shin, Heeseong and Hong, Eunbeen and Choi, Hyunwook and Cho, Hansang and Jeong, Daun and Kim, Seungryong}, title = {S4M: Boosting Semi-Supervised Instance Segmentation with SAM}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20226-20236} }
Debiased Curriculum Adaptation for Safe Transfer Learning in Chest X-ray Classification: Mingyang Liu,

Xinyang Chen,

Yang Shu,

Xiucheng Li,

Weili Guan,

Liqiang Nie; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Mingyang and Chen, Xinyang and Shu, Yang and Li, Xiucheng and Guan, Weili and Nie, Liqiang}, title = {Debiased Curriculum Adaptation for Safe Transfer Learning in Chest X-ray Classification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22610-22619} }
Quadratic Gaussian Splatting: High Quality Surface Reconstruction with Second-order Geometric Primitives: Ziyu Zhang,

Binbin Huang,

Hanqing Jiang,

Liyang Zhou,

Xiaojun Xiang,

Shuhan Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Ziyu and Huang, Binbin and Jiang, Hanqing and Zhou, Liyang and Xiang, Xiaojun and Shen, Shuhan}, title = {Quadratic Gaussian Splatting: High Quality Surface Reconstruction with Second-order Geometric Primitives}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28260-28270} }
Prompt Guidance and Human Proximal Perception for HOT Prediction with Regional Joint Loss: Yuxiao Wang,

Yu Lei,

Zhenao Wei,

Weiying Xue,

Xinyu Jiang,

Nan Zhuang,

Qi Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yuxiao and Lei, Yu and Wei, Zhenao and Xue, Weiying and Jiang, Xinyu and Zhuang, Nan and Liu, Qi}, title = {Prompt Guidance and Human Proximal Perception for HOT Prediction with Regional Joint Loss}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23636-23645} }
Towards Omnimodal Expressions and Reasoning in Referring Audio-Visual Segmentation: Kaining Ying,

Henghui Ding,

Guangquan Jie,

Yu-Gang Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ying_2025_ICCV, author = {Ying, Kaining and Ding, Henghui and Jie, Guangquan and Jiang, Yu-Gang}, title = {Towards Omnimodal Expressions and Reasoning in Referring Audio-Visual Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22575-22585} }
Towards a Universal 3D Medical Multi-modality Generalization via Learning Personalized Invariant Representation: Zhaorui Tan,

Xi Yang,

Tan Pan,

Tianyi Liu,

Chen Jiang,

Xin Guo,

Qiufeng Wang,

Anh Nguyen,

Yuan Qi,

Kaizhu Huang,

Yuan Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tan_2025_ICCV, author = {Tan, Zhaorui and Yang, Xi and Pan, Tan and Liu, Tianyi and Jiang, Chen and Guo, Xin and Wang, Qiufeng and Nguyen, Anh and Qi, Yuan and Huang, Kaizhu and Cheng, Yuan}, title = {Towards a Universal 3D Medical Multi-modality Generalization via Learning Personalized Invariant Representation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21895-21905} }
SurfaceSplat: Connecting Surface Reconstruction and Gaussian Splatting: Zihui Gao,

Jia-Wang Bian,

Guosheng Lin,

Hao Chen,

Chunhua Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_ICCV, author = {Gao, Zihui and Bian, Jia-Wang and Lin, Guosheng and Chen, Hao and Shen, Chunhua}, title = {SurfaceSplat: Connecting Surface Reconstruction and Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28525-28534} }
TransiT: Transient Transformer for Non-line-of-sight Videography: Ruiqian Li,

Siyuan Shen,

Suan Xia,

Ziheng Wang,

Xingyue Peng,

Chengxuan Song,

Yingsheng Zhu,

Tao Wu,

Shiying Li,

Jingyi Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Ruiqian and Shen, Siyuan and Xia, Suan and Wang, Ziheng and Peng, Xingyue and Song, Chengxuan and Zhu, Yingsheng and Wu, Tao and Li, Shiying and Yu, Jingyi}, title = {TransiT: Transient Transformer for Non-line-of-sight Videography}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27542-27551} }
SAFT: Shape and Appearance of Fabrics from Template via Differentiable Physical Simulations from Monocular Video: David Stotko,

Reinhard Klein; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Stotko_2025_ICCV, author = {Stotko, David and Klein, Reinhard}, title = {SAFT: Shape and Appearance of Fabrics from Template via Differentiable Physical Simulations from Monocular Video}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27660-27670} }
AD-GS: Object-Aware B-Spline Gaussian Splatting for Self-Supervised Autonomous Driving: Jiawei Xu,

Kai Deng,

Zexin Fan,

Shenlong Wang,

Jin Xie,

Jian Yang; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Jiawei and Deng, Kai and Fan, Zexin and Wang, Shenlong and Xie, Jin and Yang, Jian}, title = {AD-GS: Object-Aware B-Spline Gaussian Splatting for Self-Supervised Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24770-24779} }
HRScene: How Far Are VLMs from Effective High-Resolution Image Understanding?: Yusen Zhang,

Wenliang Zheng,

Aashrith Madasu,

Peng Shi,

Ryo Kamoi,

Hao Zhou,

Zhuoyang Zou,

Shu Zhao,

Sarkar Snigdha Sarathi Das,

Vipul Gupta,

Xiaoxin Lu,

Nan Zhang,

Ranran Haoran Zhang,

Avitej Iyer,

Renze Lou,

Wenpeng Yin,

Rui Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yusen and Zheng, Wenliang and Madasu, Aashrith and Shi, Peng and Kamoi, Ryo and Zhou, Hao and Zou, Zhuoyang and Zhao, Shu and Das, Sarkar Snigdha Sarathi and Gupta, Vipul and Lu, Xiaoxin and Zhang, Nan and Zhang, Ranran Haoran and Iyer, Avitej and Lou, Renze and Yin, Wenpeng and Zhang, Rui}, title = {HRScene: How Far Are VLMs from Effective High-Resolution Image Understanding?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22922-22933} }
Graph Domain Adaptation with Dual-branch Encoder and Two-level Alignment for Whole Slide Image-based Survival Prediction: Yuntao Shou,

Xiangyong Cao,

Peiqiang Yan,

Qiao Hui,

Qian Zhao,

Deyu Meng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shou_2025_ICCV, author = {Shou, Yuntao and Cao, Xiangyong and Yan, Peiqiang and Hui, Qiao and Zhao, Qian and Meng, Deyu}, title = {Graph Domain Adaptation with Dual-branch Encoder and Two-level Alignment for Whole Slide Image-based Survival Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19925-19935} }
Free-running vs Synchronous: Single-Photon Lidar for High-flux 3D Imaging: Ruangrawee Kitichotkul,

Shashwath Bharadwaj,

Joshua Rapp,

Yanting Ma,

Alexander Mehta,

Vivek K Goyal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kitichotkul_2025_ICCV, author = {Kitichotkul, Ruangrawee and Bharadwaj, Shashwath and Rapp, Joshua and Ma, Yanting and Mehta, Alexander and Goyal, Vivek K}, title = {Free-running vs Synchronous: Single-Photon Lidar for High-flux 3D Imaging}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25972-25982} }
MobileIE: An Extremely Lightweight and Effective ConvNet for Real-Time Image Enhancement on Mobile Devices: Hailong Yan,

Ao Li,

Xiangtao Zhang,

Zhe Liu,

Zenglin Shi,

Ce Zhu,

Le Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2025_ICCV, author = {Yan, Hailong and Li, Ao and Zhang, Xiangtao and Liu, Zhe and Shi, Zenglin and Zhu, Ce and Zhang, Le}, title = {MobileIE: An Extremely Lightweight and Effective ConvNet for Real-Time Image Enhancement on Mobile Devices}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21949-21960} }
ACE-G: Improving Generalization of Scene Coordinate Regression Through Query Pre-Training: Leonard Bruns,

Axel Barroso-Laguna,

Tommaso Cavallari,

Aron Monszpart,

Sowmya Munukutla,

Victor Adrian Prisacariu,

Eric Brachmann; [pdf] [supp]
[bibtex]
@InProceedings{Bruns_2025_ICCV, author = {Bruns, Leonard and Barroso-Laguna, Axel and Cavallari, Tommaso and Monszpart, Aron and Munukutla, Sowmya and Prisacariu, Victor Adrian and Brachmann, Eric}, title = {ACE-G: Improving Generalization of Scene Coordinate Regression Through Query Pre-Training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26751-26761} }
Everything is a Video: Unifying Modalities through Next-Frame Prediction: G. Thomas Hudson,

Dean Slack,

Thomas Winterbottom,

Jamie Sterling,

Chenghao Xiao,

Junjie Shentu,

Noura Al Moubayed; [pdf] [arXiv]
[bibtex]
@InProceedings{Hudson_2025_ICCV, author = {Hudson, G. Thomas and Slack, Dean and Winterbottom, Thomas and Sterling, Jamie and Xiao, Chenghao and Shentu, Junjie and Al Moubayed, Noura}, title = {Everything is a Video: Unifying Modalities through Next-Frame Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22004-22013} }
LayerTracer: Cognitive-Aligned Layered SVG Synthesis via Diffusion Transformer: Yiren Song,

Danze Chen,

Mike Zheng Shou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2025_ICCV, author = {Song, Yiren and Chen, Danze and Shou, Mike Zheng}, title = {LayerTracer: Cognitive-Aligned Layered SVG Synthesis via Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19731-19741} }
TopoTTA: Topology-Enhanced Test-Time Adaptation for Tubular Structure Segmentation: Jiale Zhou,

Wenhan Wang,

Shikun Li,

Xiaolei Qu,

Xin Guo,

Yizhong Liu,

Wenzhong Tang,

Xun Lin,

Yefeng Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Jiale and Wang, Wenhan and Li, Shikun and Qu, Xiaolei and Guo, Xin and Liu, Yizhong and Tang, Wenzhong and Lin, Xun and Zheng, Yefeng}, title = {TopoTTA: Topology-Enhanced Test-Time Adaptation for Tubular Structure Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24123-24134} }
Counting Stacked Objects: Corentin Dumery,

Noa Etté,

Aoxiang Fan,

Ren Li,

Jingyi Xu,

Hieu Le,

Pascal Fua; [pdf] [supp]
[bibtex]
@InProceedings{Dumery_2025_ICCV, author = {Dumery, Corentin and Ett\'e, Noa and Fan, Aoxiang and Li, Ren and Xu, Jingyi and Le, Hieu and Fua, Pascal}, title = {Counting Stacked Objects}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19774-19783} }
Sparfels: Fast Reconstruction from Sparse Unposed Imagery: Shubhendu Jena,

Amine Ouasfi,

Mae Younes,

Adnane Boukhayma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jena_2025_ICCV, author = {Jena, Shubhendu and Ouasfi, Amine and Younes, Mae and Boukhayma, Adnane}, title = {Sparfels: Fast Reconstruction from Sparse Unposed Imagery}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27476-27487} }
MikuDance: Animating Character Art with Mixed Motion Dynamics: Jiaxu Zhang,

Xianfang Zeng,

Xin Chen,

Wei Zuo,

Gang Yu,

Zhigang Tu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Jiaxu and Zeng, Xianfang and Chen, Xin and Zuo, Wei and Yu, Gang and Tu, Zhigang}, title = {MikuDance: Animating Character Art with Mixed Motion Dynamics}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19689-19699} }
GaussRender: Learning 3D Occupancy with Gaussian Rendering: Loick Chambon,

Eloi Zablocki,

Alexandre Boulch,

Mickael Chen,

Matthieu Cord; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chambon_2025_ICCV, author = {Chambon, Loick and Zablocki, Eloi and Boulch, Alexandre and Chen, Mickael and Cord, Matthieu}, title = {GaussRender: Learning 3D Occupancy with Gaussian Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27010-27020} }
UniDxMD: Towards Unified Representation for Cross-Modal Unsupervised Domain Adaptation in 3D Semantic Segmentation: Zhengyin Liang,

Hui Yin,

Min Liang,

Qianqian Du,

Ying Yang,

Hua Huang; [pdf] [supp]
[bibtex]
@InProceedings{Liang_2025_ICCV, author = {Liang, Zhengyin and Yin, Hui and Liang, Min and Du, Qianqian and Yang, Ying and Huang, Hua}, title = {UniDxMD: Towards Unified Representation for Cross-Modal Unsupervised Domain Adaptation in 3D Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20346-20356} }
HumanOLAT: A Large-Scale Dataset for Full-Body Human Relighting and Novel-View Synthesis: Timo Teufel,

Pulkit Gera,

Xilong Zhou,

Umar Iqbal,

Pramod Rao,

Jan Kautz,

Vladislav Golyanik,

Christian Theobalt; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Teufel_2025_ICCV, author = {Teufel, Timo and Gera, Pulkit and Zhou, Xilong and Iqbal, Umar and Rao, Pramod and Kautz, Jan and Golyanik, Vladislav and Theobalt, Christian}, title = {HumanOLAT: A Large-Scale Dataset for Full-Body Human Relighting and Novel-View Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29131-29141} }
Momentum-GS: Momentum Gaussian Self-Distillation for High-Quality Large Scene Reconstruction: Jixuan Fan,

Wanhua Li,

Yifei Han,

Tianru Dai,

Yansong Tang; [pdf] [supp]
[bibtex]
@InProceedings{Fan_2025_ICCV, author = {Fan, Jixuan and Li, Wanhua and Han, Yifei and Dai, Tianru and Tang, Yansong}, title = {Momentum-GS: Momentum Gaussian Self-Distillation for High-Quality Large Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25250-25260} }
SEHDR: Single-Exposure HDR Novel View Synthesis via 3D Gaussian Bracketing: Yiyu Li,

Haoyuan Wang,

Ke Xu,

Gerhard Petrus Hancke,

Rynson W.H. Lau; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Yiyu and Wang, Haoyuan and Xu, Ke and Hancke, Gerhard Petrus and Lau, Rynson W.H.}, title = {SEHDR: Single-Exposure HDR Novel View Synthesis via 3D Gaussian Bracketing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26045-26054} }
UPP: Unified Point-Level Prompting for Robust Point Cloud Analysis: Zixiang Ai,

Zhenyu Cui,

Yuxin Peng,

Jiahuan Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ai_2025_ICCV, author = {Ai, Zixiang and Cui, Zhenyu and Peng, Yuxin and Zhou, Jiahuan}, title = {UPP: Unified Point-Level Prompting for Robust Point Cloud Analysis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27359-27368} }
MOSAIC: Generating Consistent, Privacy-Preserving Scenes from Multiple Depth Views in Multi-Room Environments: Zhixuan Liu,

Haokun Zhu,

Rui Chen,

Jonathan Francis,

Soonmin Hwang,

Ji Zhang,

Jean Oh; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Zhixuan and Zhu, Haokun and Chen, Rui and Francis, Jonathan and Hwang, Soonmin and Zhang, Ji and Oh, Jean}, title = {MOSAIC: Generating Consistent, Privacy-Preserving Scenes from Multiple Depth Views in Multi-Room Environments}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27456-27465} }
Towards Video Thinking Test: A Holistic Benchmark for Advanced Video Reasoning and Understanding: Yuanhan Zhang,

Yunice Chew,

Yuhao Dong,

Aria Leo,

Bo Hu,

Ziwei Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yuanhan and Chew, Yunice and Dong, Yuhao and Leo, Aria and Hu, Bo and Liu, Ziwei}, title = {Towards Video Thinking Test: A Holistic Benchmark for Advanced Video Reasoning and Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20626-20636} }
Decoupled Diffusion Sparks Adaptive Scene Generation: Yunsong Zhou,

Naisheng Ye,

William Ljungbergh,

Tianyu Li,

Jiazhi Yang,

Zetong Yang,

Hongzi Zhu,

Christoffer Petersson,

Hongyang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Yunsong and Ye, Naisheng and Ljungbergh, William and Li, Tianyu and Yang, Jiazhi and Yang, Zetong and Zhu, Hongzi and Petersson, Christoffer and Li, Hongyang}, title = {Decoupled Diffusion Sparks Adaptive Scene Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27760-27770} }
PriorMotion: Generative Class-Agnostic Motion Prediction with Raster-Vector Motion Field Priors: Kangan Qian,

Jinyu Miao,

Xinyu Jiao,

Ziang Luo,

Zheng Fu,

Yining Shi,

Yunlong Wang,

Kun Jiang,

Diange Yang; [pdf] [supp]
[bibtex]
@InProceedings{Qian_2025_ICCV, author = {Qian, Kangan and Miao, Jinyu and Jiao, Xinyu and Luo, Ziang and Fu, Zheng and Shi, Yining and Wang, Yunlong and Jiang, Kun and Yang, Diange}, title = {PriorMotion: Generative Class-Agnostic Motion Prediction with Raster-Vector Motion Field Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27284-27294} }
Scaling Tumor Segmentation: Best Lessons from Real and Synthetic Data: Qi Chen,

Xinze Zhou,

Chen Liu,

Hao Chen,

Wenxuan Li,

Zekun Jiang,

Ziyan Huang,

Yuxuan Zhao,

Dexin Yu,

Junjun He,

Yefeng Zheng,

Ling Shao,

Alan Yuille,

Zongwei Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Qi and Zhou, Xinze and Liu, Chen and Chen, Hao and Li, Wenxuan and Jiang, Zekun and Huang, Ziyan and Zhao, Yuxuan and Yu, Dexin and He, Junjun and Zheng, Yefeng and Shao, Ling and Yuille, Alan and Zhou, Zongwei}, title = {Scaling Tumor Segmentation: Best Lessons from Real and Synthetic Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24001-24013} }
MobileViCLIP: An Efficient Video-Text Model for Mobile Devices: Min Yang,

Zihan Jia,

Zhilin Dai,

Sheng Guo,

Limin Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Min and Jia, Zihan and Dai, Zhilin and Guo, Sheng and Wang, Limin}, title = {MobileViCLIP: An Efficient Video-Text Model for Mobile Devices}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20824-20835} }
SignRep: Enhancing Self-Supervised Sign Representations: Ryan Wong,

Necati Cihan Camgoz,

Richard Bowden; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wong_2025_ICCV, author = {Wong, Ryan and Camgoz, Necati Cihan and Bowden, Richard}, title = {SignRep: Enhancing Self-Supervised Sign Representations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22804-22814} }
Visual Relation Diffusion for Human-Object Interaction Detection: Ping Cao,

Yepeng Tang,

Chunjie Zhang,

Xiaolong Zheng,

Chao Liang,

Yunchao Wei,

Yao Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Cao_2025_ICCV, author = {Cao, Ping and Tang, Yepeng and Zhang, Chunjie and Zheng, Xiaolong and Liang, Chao and Wei, Yunchao and Zhao, Yao}, title = {Visual Relation Diffusion for Human-Object Interaction Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23551-23560} }
Egocentric Action-aware Inertial Localization in Point Clouds with Vision-Language Guidance: Mingfang Zhang,

Ryo Yonetani,

Yifei Huang,

Liangyang Ouyang,

Ruicong Liu,

Yoichi Sato; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Mingfang and Yonetani, Ryo and Huang, Yifei and Ouyang, Liangyang and Liu, Ruicong and Sato, Yoichi}, title = {Egocentric Action-aware Inertial Localization in Point Clouds with Vision-Language Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27209-27219} }
EVT: Efficient View Transformation for Multi-Modal 3D Object Detection: Yongjin Lee,

Hyeon-Mun Jeong,

Yurim Jeon,

Sanghyun Kim; [pdf] [arXiv]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Yongjin and Jeong, Hyeon-Mun and Jeon, Yurim and Kim, Sanghyun}, title = {EVT: Efficient View Transformation for Multi-Modal 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26632-26642} }
Vision-Language Neural Graph Featurization for Extracting Retinal Lesions: Taimur Hassan,

Anabia Sohail,

Muzammal Naseer,

Naoufel Werghi; [pdf]
[bibtex]
@InProceedings{Hassan_2025_ICCV, author = {Hassan, Taimur and Sohail, Anabia and Naseer, Muzammal and Werghi, Naoufel}, title = {Vision-Language Neural Graph Featurization for Extracting Retinal Lesions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23700-23709} }
V2XPnP: Vehicle-to-Everything Spatio-Temporal Fusion for Multi-Agent Perception and Prediction: Zewei Zhou,

Hao Xiang,

Zhaoliang Zheng,

Seth Z. Zhao,

Mingyue Lei,

Yun Zhang,

Tianhui Cai,

Xinyi Liu,

Johnson Liu,

Maheswari Bajji,

Xin Xia,

Zhiyu Huang,

Bolei Zhou,

Jiaqi Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Zewei and Xiang, Hao and Zheng, Zhaoliang and Zhao, Seth Z. and Lei, Mingyue and Zhang, Yun and Cai, Tianhui and Liu, Xinyi and Liu, Johnson and Bajji, Maheswari and Xia, Xin and Huang, Zhiyu and Zhou, Bolei and Ma, Jiaqi}, title = {V2XPnP: Vehicle-to-Everything Spatio-Temporal Fusion for Multi-Agent Perception and Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25399-25409} }
Fix-CLIP: Dual-Branch Hierarchical Contrastive Learning via Synthetic Captions for Better Understanding of Long Text: Bingchao Wang,

Zhiwei Ning,

Jianyu Ding,

Xuanang Gao,

Yin Li,

Dongsheng Jiang,

Jie Yang,

Wei Liu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Bingchao and Ning, Zhiwei and Ding, Jianyu and Gao, Xuanang and Li, Yin and Jiang, Dongsheng and Yang, Jie and Liu, Wei}, title = {Fix-CLIP: Dual-Branch Hierarchical Contrastive Learning via Synthetic Captions for Better Understanding of Long Text}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20694-20704} }
ETVA: Evaluation of Text-to-Video Alignment via Fine-grained Question Generation and Answering: Kaisi Guan,

Zhengfeng Lai,

Yuchong Sun,

Peng Zhang,

Wei Liu,

Kieran Liu,

Meng Cao,

Ruihua Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guan_2025_ICCV, author = {Guan, Kaisi and Lai, Zhengfeng and Sun, Yuchong and Zhang, Peng and Liu, Wei and Liu, Kieran and Cao, Meng and Song, Ruihua}, title = {ETVA: Evaluation of Text-to-Video Alignment via Fine-grained Question Generation and Answering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21299-21309} }
Outdoor Monocular SLAM with Global Scale-Consistent 3D Gaussian Pointmaps: Chong Cheng,

Sicheng Yu,

Zijian Wang,

Yifan Zhou,

Hao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2025_ICCV, author = {Cheng, Chong and Yu, Sicheng and Wang, Zijian and Zhou, Yifan and Wang, Hao}, title = {Outdoor Monocular SLAM with Global Scale-Consistent 3D Gaussian Pointmaps}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26035-26044} }
7DGS: Unified Spatial-Temporal-Angular Gaussian Splatting: Zhongpai Gao,

Benjamin Planche,

Meng Zheng,

Anwesa Choudhuri,

Terrence Chen,

Ziyan Wu; [pdf] [arXiv]
[bibtex]
@InProceedings{Gao_2025_ICCV, author = {Gao, Zhongpai and Planche, Benjamin and Zheng, Meng and Choudhuri, Anwesa and Chen, Terrence and Wu, Ziyan}, title = {7DGS: Unified Spatial-Temporal-Angular Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26316-26325} }
Robust and Efficient 3D Gaussian Splatting for Urban Scene Reconstruction: Zhensheng Yuan,

Haozhi Huang,

Zhen Xiong,

Di Wang,

Guanghua Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2025_ICCV, author = {Yuan, Zhensheng and Huang, Haozhi and Xiong, Zhen and Wang, Di and Yang, Guanghua}, title = {Robust and Efficient 3D Gaussian Splatting for Urban Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26209-26219} }
Iris: Breaking GUI Complexity with Adaptive Focus and Self-Refining: Zhiqi Ge,

Juncheng Li,

Xinglei Pang,

Minghe Gao,

Kaihang Pan,

Wang Lin,

Hao Fei,

Wenqiao Zhang,

Siliang Tang,

Yueting Zhuang; [pdf] [arXiv]
[bibtex]
@InProceedings{Ge_2025_ICCV, author = {Ge, Zhiqi and Li, Juncheng and Pang, Xinglei and Gao, Minghe and Pan, Kaihang and Lin, Wang and Fei, Hao and Zhang, Wenqiao and Tang, Siliang and Zhuang, Yueting}, title = {Iris: Breaking GUI Complexity with Adaptive Focus and Self-Refining}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24559-24568} }
SciVid: Cross-Domain Evaluation of Video Models in Scientific Applications: Yana Hasson,

Pauline Luc,

Liliane Momeni,

Maks Ovsjanikov,

Guillaume Le Moing,

Alina Kuznetsova,

Ira Ktena,

Jennifer J. Sun,

Skanda Koppula,

Dilara Gokay,

Joseph Heyward,

Etienne Pot,

Andrew Zisserman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hasson_2025_ICCV, author = {Hasson, Yana and Luc, Pauline and Momeni, Liliane and Ovsjanikov, Maks and Le Moing, Guillaume and Kuznetsova, Alina and Ktena, Ira and Sun, Jennifer J. and Koppula, Skanda and Gokay, Dilara and Heyward, Joseph and Pot, Etienne and Zisserman, Andrew}, title = {SciVid: Cross-Domain Evaluation of Video Models in Scientific Applications}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21800-21811} }
Vision-Language Models Can't See the Obvious: Ngoc Dung Huynh,

Phuc H Le-Khac,

Wamiq Reyaz Para,

Ankit Singh,

Sanath Narayan; [pdf]
[bibtex]
@InProceedings{Huynh_2025_ICCV, author = {Huynh, Ngoc Dung and Le-Khac, Phuc H and Para, Wamiq Reyaz and Singh, Ankit and Narayan, Sanath}, title = {Vision-Language Models Can't See the Obvious}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24159-24169} }
CountSE: Soft Exemplar Open-set Object Counting: Shuai Liu,

Peng Zhang,

Shiwei Zhang,

Wei Ke; [pdf]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Shuai and Zhang, Peng and Zhang, Shiwei and Ke, Wei}, title = {CountSE: Soft Exemplar Open-set Object Counting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21536-21546} }
MIORe & VAR-MIORe: Benchmarks to Push the Boundaries of Restoration: George Ciubotariu,

Zhuyun Zhou,

Zongwei Wu,

Radu Timofte; [pdf] [supp]
[bibtex]
@InProceedings{Ciubotariu_2025_ICCV, author = {Ciubotariu, George and Zhou, Zhuyun and Wu, Zongwei and Timofte, Radu}, title = {MIORe \& VAR-MIORe: Benchmarks to Push the Boundaries of Restoration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19784-19793} }
Thermal Polarimetric Multi-view Stereo: Takahiro Kushida,

Kenichiro Tanaka; [pdf]
[bibtex]
@InProceedings{Kushida_2025_ICCV, author = {Kushida, Takahiro and Tanaka, Kenichiro}, title = {Thermal Polarimetric Multi-view Stereo}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27390-27399} }
SparseVILA: Decoupling Visual Sparsity for Efficient VLM Inference: Samir Khaki,

Junxian Guo,

Jiaming Tang,

Shang Yang,

Yukang Chen,

Konstantinos N. Plataniotis,

Yao Lu,

Song Han,

Zhijian Liu; [pdf] [supp]
[bibtex]
@InProceedings{Khaki_2025_ICCV, author = {Khaki, Samir and Guo, Junxian and Tang, Jiaming and Yang, Shang and Chen, Yukang and Plataniotis, Konstantinos N. and Lu, Yao and Han, Song and Liu, Zhijian}, title = {SparseVILA: Decoupling Visual Sparsity for Efficient VLM Inference}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23784-23794} }
Relative Illumination Fields: Learning Medium and Light Independent Underwater Scenes: Mengkun She,

Felix Seegräber,

David Nakath,

Patricia Schöntag,

Kevin Köser; [pdf] [supp]
[bibtex]
@InProceedings{She_2025_ICCV, author = {She, Mengkun and Seegr\"aber, Felix and Nakath, David and Sch\"ontag, Patricia and K\"oser, Kevin}, title = {Relative Illumination Fields: Learning Medium and Light Independent Underwater Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29110-29119} }
Wave-MambaAD: Wavelet-driven State Space Model for Multi-class Unsupervised Anomaly Detection: Qiao Zhang,

Mingwen Shao,

Xinyuan Chen,

Xiang Lv,

Kai Xu; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Qiao and Shao, Mingwen and Chen, Xinyuan and Lv, Xiang and Xu, Kai}, title = {Wave-MambaAD: Wavelet-driven State Space Model for Multi-class Unsupervised Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20868-20877} }
Inter2Former: Dynamic Hybrid Attention for Efficient High-Precision Interactive Segmentation: You Huang,

Lichao Chen,

Jiayi Ji,

Liujuan Cao,

Shengchuan Zhang,

Rongrong Ji; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, You and Chen, Lichao and Ji, Jiayi and Cao, Liujuan and Zhang, Shengchuan and Ji, Rongrong}, title = {Inter2Former: Dynamic Hybrid Attention for Efficient High-Precision Interactive Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19816-19826} }
KDA: Knowledge Diffusion Alignment with Enhanced Context for Video Temporal Grounding: Ran Ran,

Jiwei Wei,

Shiyuan He,

Zeyu Ma,

Chaoning Zhang,

Ning Xie,

Yang Yang; [pdf]
[bibtex]
@InProceedings{Ran_2025_ICCV, author = {Ran, Ran and Wei, Jiwei and He, Shiyuan and Ma, Zeyu and Zhang, Chaoning and Xie, Ning and Yang, Yang}, title = {KDA: Knowledge Diffusion Alignment with Enhanced Context for Video Temporal Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23311-23320} }
TrackAny3D: Transferring Pretrained 3D Models for Category-unified 3D Point Cloud Tracking: Mengmeng Wang,

Haonan Wang,

Yulong Li,

Xiangjie Kong,

Jiaxin Du,

Guojiang Shen,

Feng Xia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Mengmeng and Wang, Haonan and Li, Yulong and Kong, Xiangjie and Du, Jiaxin and Shen, Guojiang and Xia, Feng}, title = {TrackAny3D: Transferring Pretrained 3D Models for Category-unified 3D Point Cloud Tracking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28249-28259} }
S2M2: Scalable Stereo Matching Model for Reliable Depth Estimation: Junhong Min,

Youngpil Jeon,

Jimin Kim,

Minyong Choi; [pdf] [supp]
[bibtex]
@InProceedings{Min_2025_ICCV, author = {Min, Junhong and Jeon, Youngpil and Kim, Jimin and Choi, Minyong}, title = {S2M2: Scalable Stereo Matching Model for Reliable Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26729-26739} }
NETracer: A Topology-Aware Iterative Tracing Approach for Tubular Structure Extraction: Chao Liu,

Yangbo Jiang,

Nenggan Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Chao and Jiang, Yangbo and Zheng, Nenggan}, title = {NETracer: A Topology-Aware Iterative Tracing Approach for Tubular Structure Extraction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20593-20602} }
Towards Open-World Generation of Stereo Images and Unsupervised Matching: Feng Qiao,

Zhexiao Xiong,

Eric Xing,

Nathan Jacobs; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qiao_2025_ICCV, author = {Qiao, Feng and Xiong, Zhexiao and Xing, Eric and Jacobs, Nathan}, title = {Towards Open-World Generation of Stereo Images and Unsupervised Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26579-26589} }
PolarAnything: Diffusion-based Polarimetric Image Synthesis: Kailong Zhang,

Youwei Lyu,

Heng Guo,

Si Li,

Zhanyu Ma,

Boxin Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Kailong and Lyu, Youwei and Guo, Heng and Li, Si and Ma, Zhanyu and Shi, Boxin}, title = {PolarAnything: Diffusion-based Polarimetric Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26466-26476} }
What's Making That Sound Right Now? Video-centric Audio-Visual Localization: Hahyeon Choi,

Junhoo Lee,

Nojun Kwak; [pdf] [supp]
[bibtex]
@InProceedings{Choi_2025_ICCV, author = {Choi, Hahyeon and Lee, Junhoo and Kwak, Nojun}, title = {What's Making That Sound Right Now? Video-centric Audio-Visual Localization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20095-20104} }
Scheduling Weight Transitions for Quantization-Aware Training: Junghyup Lee,

Jeimin Jeon,

Dohyung Kim,

Bumsub Ham; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Junghyup and Jeon, Jeimin and Kim, Dohyung and Ham, Bumsub}, title = {Scheduling Weight Transitions for Quantization-Aware Training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23466-23475} }
Efficient Spiking Point Mamba for Point Cloud Analysis: Peixi Wu,

Bosong Chai,

Menghua Zheng,

Wei Li,

Zhangchi Hu,

Jie Chen,

Zheyu Zhang,

Hebei Li,

Xiaoyan Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Peixi and Chai, Bosong and Zheng, Menghua and Li, Wei and Hu, Zhangchi and Chen, Jie and Zhang, Zheyu and Li, Hebei and Sun, Xiaoyan}, title = {Efficient Spiking Point Mamba for Point Cloud Analysis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26393-26403} }
FE-CLIP: Frequency Enhanced CLIP Model for Zero-Shot Anomaly Detection and Segmentation: Tao Gong,

Qi Chu,

Bin Liu,

Wei Zhou,

Nenghai Yu; [pdf] [supp]
[bibtex]
@InProceedings{Gong_2025_ICCV, author = {Gong, Tao and Chu, Qi and Liu, Bin and Zhou, Wei and Yu, Nenghai}, title = {FE-CLIP: Frequency Enhanced CLIP Model for Zero-Shot Anomaly Detection and Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21220-21230} }
Alleviating Textual Reliance in Medical Language-guided Segmentation via Prototype-driven Semantic Approximation: Shuchang Ye,

Usman Naseem,

Mingyuan Meng,

Jinman Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2025_ICCV, author = {Ye, Shuchang and Naseem, Usman and Meng, Mingyuan and Kim, Jinman}, title = {Alleviating Textual Reliance in Medical Language-guided Segmentation via Prototype-driven Semantic Approximation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22316-22326} }
CoTMR: Chain-of-Thought Multi-Scale Reasoning for Training-Free Zero-Shot Composed Image Retrieval: Zelong Sun,

Dong Jing,

Zhiwu Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Zelong and Jing, Dong and Lu, Zhiwu}, title = {CoTMR: Chain-of-Thought Multi-Scale Reasoning for Training-Free Zero-Shot Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22675-22684} }
BASIC: Boosting Visual Alignment with Intrinsic Refined Embeddings in Multimodal Large Language Models: Jianting Tang,

Yubo Wang,

Haoyu Cao,

Linli Xu; [pdf] [arXiv]
[bibtex]
@InProceedings{Tang_2025_ICCV, author = {Tang, Jianting and Wang, Yubo and Cao, Haoyu and Xu, Linli}, title = {BASIC: Boosting Visual Alignment with Intrinsic Refined Embeddings in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20582-20592} }
Long-term Traffic Simulation with Interleaved Autoregressive Motion and Scenario Generation: Xiuyu Yang,

Shuhan Tan,

Philipp Krähenbühl; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Xiuyu and Tan, Shuhan and Kr\"ahenb\"uhl, Philipp}, title = {Long-term Traffic Simulation with Interleaved Autoregressive Motion and Scenario Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25305-25314} }
RhythmGuassian: Repurposing Generalizable Gaussian Model For Remote Physiological Measurement: Hao Lu,

Yuting Zhang,

Jiaqi Tang,

Bowen Fu,

Wenhang Ge,

Wei Wei,

Kaishun Wu,

Yingcong Chen; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Hao and Zhang, Yuting and Tang, Jiaqi and Fu, Bowen and Ge, Wenhang and Wei, Wei and Wu, Kaishun and Chen, Yingcong}, title = {RhythmGuassian: Repurposing Generalizable Gaussian Model For Remote Physiological Measurement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20780-20790} }
DASH: Detection and Assessment of Systematic Hallucinations of VLMs: Maximilian Augustin,

Yannic Neuhaus,

Matthias Hein; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Augustin_2025_ICCV, author = {Augustin, Maximilian and Neuhaus, Yannic and Hein, Matthias}, title = {DASH: Detection and Assessment of Systematic Hallucinations of VLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22748-22759} }
On the Provable Importance of Gradients for Autonomous Language-Assisted Image Clustering: Bo Peng,

Jie Lu,

Guangquan Zhang,

Zhen Fang; [pdf] [supp]
[bibtex]
@InProceedings{Peng_2025_ICCV, author = {Peng, Bo and Lu, Jie and Zhang, Guangquan and Fang, Zhen}, title = {On the Provable Importance of Gradients for Autonomous Language-Assisted Image Clustering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19805-19815} }
ZipVL: Accelerating Vision-Language Models through Dynamic Token Sparsity: Yefei He,

Feng Chen,

Jing Liu,

Wenqi Shao,

Hong Zhou,

Kaipeng Zhang,

Bohan Zhuang; [pdf] [supp]
[bibtex]
@InProceedings{He_2025_ICCV, author = {He, Yefei and Chen, Feng and Liu, Jing and Shao, Wenqi and Zhou, Hong and Zhang, Kaipeng and Zhuang, Bohan}, title = {ZipVL: Accelerating Vision-Language Models through Dynamic Token Sparsity}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20477-20486} }
RayletDF: Raylet Distance Fields for Generalizable 3D Surface Reconstruction from Point Clouds or Gaussians: Shenxing Wei,

Jinxi Li,

Yafei Yang,

Siyuan Zhou,

Bo Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2025_ICCV, author = {Wei, Shenxing and Li, Jinxi and Yang, Yafei and Zhou, Siyuan and Yang, Bo}, title = {RayletDF: Raylet Distance Fields for Generalizable 3D Surface Reconstruction from Point Clouds or Gaussians}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25616-25626} }
Planar Affine Rectification from Local Change of Scale and Orientation: Yuval Nissan,

Marc Pollefeys,

Daniel Barath; [pdf] [supp]
[bibtex]
@InProceedings{Nissan_2025_ICCV, author = {Nissan, Yuval and Pollefeys, Marc and Barath, Daniel}, title = {Planar Affine Rectification from Local Change of Scale and Orientation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27147-27155} }
You Share Beliefs, I Adapt: Progressive Heterogeneous Collaborative Perception: Hao Si,

Ehsan Javanmardi,

Manabu Tsukada; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Si_2025_ICCV, author = {Si, Hao and Javanmardi, Ehsan and Tsukada, Manabu}, title = {You Share Beliefs, I Adapt: Progressive Heterogeneous Collaborative Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27521-27530} }
Advancing Visual Large Language Model for Multi-granular Versatile Perception: Wentao Xiang,

Haoxian Tan,

Yujie Zhong,

Cong Wei,

Dengjie Li,

Yujiu Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiang_2025_ICCV, author = {Xiang, Wentao and Tan, Haoxian and Zhong, Yujie and Wei, Cong and Li, Dengjie and Yang, Yujiu}, title = {Advancing Visual Large Language Model for Multi-granular Versatile Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22153-22164} }
STD-GS: Exploring Frame-Event Interaction for SpatioTemporal-Disentangled Gaussian Splatting to Reconstruct High-Dynamic Scene: Hanyu Zhou,

Haonan Wang,

Haoyue Liu,

Yuxing Duan,

Luxin Yan,

Gim Hee Lee; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Hanyu and Wang, Haonan and Liu, Haoyue and Duan, Yuxing and Yan, Luxin and Lee, Gim Hee}, title = {STD-GS: Exploring Frame-Event Interaction for SpatioTemporal-Disentangled Gaussian Splatting to Reconstruct High-Dynamic Scene}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24801-24810} }
C2MIL: Synchronizing Semantic and Topological Causalities in Multiple Instance Learning for Robust and Interpretable Survival Analysis: Min Cen,

Zhenfeng Zhuang,

Yuzhe Zhang,

Min Zeng,

Baptiste Magnier,

Lequan Yu,

Hong Zhang,

Liansheng Wang; [pdf] [supp]
[bibtex]
@InProceedings{Cen_2025_ICCV, author = {Cen, Min and Zhuang, Zhenfeng and Zhang, Yuzhe and Zeng, Min and Magnier, Baptiste and Yu, Lequan and Zhang, Hong and Wang, Liansheng}, title = {C2MIL: Synchronizing Semantic and Topological Causalities in Multiple Instance Learning for Robust and Interpretable Survival Analysis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24392-24401} }
Object-level Correlation for Few-Shot Segmentation: Chunlin Wen,

Yu Zhang,

Jie Fan,

Hongyuan Zhu,

Xiu-Shen Wei,

Yijun Wang,

Zhiqiang Kou,

Shuzhou Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wen_2025_ICCV, author = {Wen, Chunlin and Zhang, Yu and Fan, Jie and Zhu, Hongyuan and Wei, Xiu-Shen and Wang, Yijun and Kou, Zhiqiang and Sun, Shuzhou}, title = {Object-level Correlation for Few-Shot Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23689-23699} }
CVFusion: Cross-View Fusion of 4D Radar and Camera for 3D Object Detection: Hanzhi Zhong,

Zhiyu Xiang,

Ruoyu Xu,

Jingyun Fu,

Peng Xu,

Shaohong Wang,

Zhihao Yang,

Tianyu Pu,

Eryun Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhong_2025_ICCV, author = {Zhong, Hanzhi and Xiang, Zhiyu and Xu, Ruoyu and Fu, Jingyun and Xu, Peng and Wang, Shaohong and Yang, Zhihao and Pu, Tianyu and Liu, Eryun}, title = {CVFusion: Cross-View Fusion of 4D Radar and Camera for 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28188-28197} }
ViT-Linearizer: Distilling Quadratic Knowledge into Linear-Time Vision Models: Guoyizhe Wei,

Rama Chellappa; [pdf] [supp]
[bibtex]
@InProceedings{Wei_2025_ICCV, author = {Wei, Guoyizhe and Chellappa, Rama}, title = {ViT-Linearizer: Distilling Quadratic Knowledge into Linear-Time Vision Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20737-20747} }
RobustSplat: Decoupling Densification and Dynamics for Transient-Free 3DGS: Chuanyu Fu,

Yuqi Zhang,

Kunbin Yao,

Guanying Chen,

Yuan Xiong,

Chuan Huang,

Shuguang Cui,

Xiaochun Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fu_2025_ICCV, author = {Fu, Chuanyu and Zhang, Yuqi and Yao, Kunbin and Chen, Guanying and Xiong, Yuan and Huang, Chuan and Cui, Shuguang and Cao, Xiaochun}, title = {RobustSplat: Decoupling Densification and Dynamics for Transient-Free 3DGS}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27126-27136} }
FOLDER: Accelerating Multi-Modal Large Language Models with Enhanced Performance: Haicheng Wang,

Zhemeng Yu,

Gabriele Spadaro,

Chen Ju,

Victor Quétu,

Shuai Xiao,

Enzo Tartaglione; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Haicheng and Yu, Zhemeng and Spadaro, Gabriele and Ju, Chen and Qu\'etu, Victor and Xiao, Shuai and Tartaglione, Enzo}, title = {FOLDER: Accelerating Multi-Modal Large Language Models with Enhanced Performance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23614-23625} }
Learn2Synth: Learning Optimal Data Synthesis Using Hypergradients for Brain Image Segmentation: Xiaoling Hu,

Xiangrui Zeng,

Oula Puonti,

Juan Eugenio Iglesias,

Bruce Fischl,

Yaël Balbastre; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_ICCV, author = {Hu, Xiaoling and Zeng, Xiangrui and Puonti, Oula and Iglesias, Juan Eugenio and Fischl, Bruce and Balbastre, Ya\"el}, title = {Learn2Synth: Learning Optimal Data Synthesis Using Hypergradients for Brain Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20368-20378} }
Bolt3D: Generating 3D Scenes in Seconds: Stanislaw Szymanowicz,

Jason Y. Zhang,

Pratul Srinivasan,

Ruiqi Gao,

Arthur Brussee,

Aleksander Holynski,

Ricardo Martin-Brualla,

Jonathan T. Barron,

Philipp Henzler; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Szymanowicz_2025_ICCV, author = {Szymanowicz, Stanislaw and Zhang, Jason Y. and Srinivasan, Pratul and Gao, Ruiqi and Brussee, Arthur and Holynski, Aleksander and Martin-Brualla, Ricardo and Barron, Jonathan T. and Henzler, Philipp}, title = {Bolt3D: Generating 3D Scenes in Seconds}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24846-24857} }
Know Your Attention Maps: Class-specific Token Masking for Weakly Supervised Semantic Segmentation: Joëlle Hanna,

Damian Borth; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hanna_2025_ICCV, author = {Hanna, Jo\"elle and Borth, Damian}, title = {Know Your Attention Maps: Class-specific Token Masking for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23763-23772} }
Reducing Unimodal Bias in Multi-Modal Semantic Segmentation with Multi-Scale Functional Entropy Regularization: Xu Zheng,

Yuanhuiyi Lyu,

Lutao Jiang,

Danda Pani Paudel,

Luc Van Gool,

Xuming Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Xu and Lyu, Yuanhuiyi and Jiang, Lutao and Paudel, Danda Pani and Van Gool, Luc and Hu, Xuming}, title = {Reducing Unimodal Bias in Multi-Modal Semantic Segmentation with Multi-Scale Functional Entropy Regularization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21166-21176} }
ETA: Efficiency through Thinking Ahead, A Dual Approach to Self-Driving with Large Models: Shadi Hamdan,

Chonghao Sima,

Zetong Yang,

Hongyang Li,

Fatma Guney; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hamdan_2025_ICCV, author = {Hamdan, Shadi and Sima, Chonghao and Yang, Zetong and Li, Hongyang and Guney, Fatma}, title = {ETA: Efficiency through Thinking Ahead, A Dual Approach to Self-Driving with Large Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26529-26538} }
Understanding Personal Concept in Open-Vocabulary Semantic Segmentation: Sunghyun Park,

Jungsoo Lee,

Shubhankar Borse,

Munawar Hayat,

Sungha Choi,

Kyuwoong Hwang,

Fatih Porikli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2025_ICCV, author = {Park, Sunghyun and Lee, Jungsoo and Borse, Shubhankar and Hayat, Munawar and Choi, Sungha and Hwang, Kyuwoong and Porikli, Fatih}, title = {Understanding Personal Concept in Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19957-19966} }
Fuzzy Contrastive Decoding to Alleviate Object Hallucination in Large Vision-Language Models: Jieun Kim,

Jinmyeong Kim,

Yoonji Kim,

Sung-Bae Cho; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Jieun and Kim, Jinmyeong and Kim, Yoonji and Cho, Sung-Bae}, title = {Fuzzy Contrastive Decoding to Alleviate Object Hallucination in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20572-20581} }
Boosting Multimodal Learning via Disentangled Gradient Learning: Shicai Wei,

Chunbo Luo,

Yang Luo; [pdf] [arXiv]
[bibtex]
@InProceedings{Wei_2025_ICCV, author = {Wei, Shicai and Luo, Chunbo and Luo, Yang}, title = {Boosting Multimodal Learning via Disentangled Gradient Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22879-22888} }
DisTime: Distribution-based Time Representation for Video Large Language Models: Yingsen Zeng,

Zepeng Huang,

Yujie Zhong,

Chengjian Feng,

Jie Hu,

Lin Ma,

Yang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2025_ICCV, author = {Zeng, Yingsen and Huang, Zepeng and Zhong, Yujie and Feng, Chengjian and Hu, Jie and Ma, Lin and Liu, Yang}, title = {DisTime: Distribution-based Time Representation for Video Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21961-21971} }
COIN: Confidence Score-Guided Distillation for Annotation-Free Cell Segmentation: Sanghyun Jo,

Seo Jin Lee,

Seungwoo Lee,

Seohyung Hong,

Hyungseok Seo,

Kyungsu Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jo_2025_ICCV, author = {Jo, Sanghyun and Lee, Seo Jin and Lee, Seungwoo and Hong, Seohyung and Seo, Hyungseok and Kim, Kyungsu}, title = {COIN: Confidence Score-Guided Distillation for Annotation-Free Cell Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20324-20335} }
3D Test-time Adaptation via Graph Spectral Driven Point Shift: Xin Wei,

Qin Yang,

Yijie Fang,

Mingrui Zhu,

Nannan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2025_ICCV, author = {Wei, Xin and Yang, Qin and Fang, Yijie and Zhu, Mingrui and Wang, Nannan}, title = {3D Test-time Adaptation via Graph Spectral Driven Point Shift}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26762-26771} }
StruMamba3D: Exploring Structural Mamba for Self-supervised Point Cloud Representation Learning: Chuxin Wang,

Yixin Zha,

Wenfei Yang,

Tianzhu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Chuxin and Zha, Yixin and Yang, Wenfei and Zhang, Tianzhu}, title = {StruMamba3D: Exploring Structural Mamba for Self-supervised Point Cloud Representation Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28546-28555} }
Easy3D: A Simple Yet Effective Method for 3D Interactive Segmentation: Andrea Simonelli,

Norman Müller,

Peter Kontschieder; [pdf] [supp]
[bibtex]
@InProceedings{Simonelli_2025_ICCV, author = {Simonelli, Andrea and M\"uller, Norman and Kontschieder, Peter}, title = {Easy3D: A Simple Yet Effective Method for 3D Interactive Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24707-24716} }
DynImg: Key Frames with Visual Prompts are Good Representation for Multi-Modal Video Understanding: Xiaoyi Bao,

Chenwei Xie,

Hao Tang,

Tingyu Weng,

Xiaofeng Wang,

Yun Zheng,

Xingang Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bao_2025_ICCV, author = {Bao, Xiaoyi and Xie, Chenwei and Tang, Hao and Weng, Tingyu and Wang, Xiaofeng and Zheng, Yun and Wang, Xingang}, title = {DynImg: Key Frames with Visual Prompts are Good Representation for Multi-Modal Video Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23678-23688} }
Self-Supervised Sparse Sensor Fusion for Long Range Perception: Edoardo Palladin,

Samuel Brucker,

Filippo Ghilotti,

Praveen Narayanan,

Mario Bijelic,

Felix Heide; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Palladin_2025_ICCV, author = {Palladin, Edoardo and Brucker, Samuel and Ghilotti, Filippo and Narayanan, Praveen and Bijelic, Mario and Heide, Felix}, title = {Self-Supervised Sparse Sensor Fusion for Long Range Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27498-27509} }
GeoDistill: Geometry-Guided Self-Distillation for Weakly Supervised Cross-View Localization: Shaowen Tong,

Zimin Xia,

Alexandre Alahi,

Xuming He,

Yujiao Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tong_2025_ICCV, author = {Tong, Shaowen and Xia, Zimin and Alahi, Alexandre and He, Xuming and Shi, Yujiao}, title = {GeoDistill: Geometry-Guided Self-Distillation for Weakly Supervised Cross-View Localization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25357-25366} }
Leveraging 2D Priors and SDF Guidance for Urban Scene Rendering: Siddharth Tourani,

Jayaram Reddy,

Akash Kumbar,

Satyajit Tourani,

Nishant Goyal,

Madhava Krishna,

N Dinesh Reddy,

Muhammad Haris Khan; [pdf] [supp]
[bibtex]
@InProceedings{Tourani_2025_ICCV, author = {Tourani, Siddharth and Reddy, Jayaram and Kumbar, Akash and Tourani, Satyajit and Goyal, Nishant and Krishna, Madhava and Reddy, N Dinesh and Khan, Muhammad Haris}, title = {Leveraging 2D Priors and SDF Guidance for Urban Scene Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29051-29063} }
SVTRv2: CTC Beats Encoder-Decoder Models in Scene Text Recognition: Yongkun Du,

Zhineng Chen,

Hongtao Xie,

Caiyan Jia,

Yu-Gang Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Du_2025_ICCV, author = {Du, Yongkun and Chen, Zhineng and Xie, Hongtao and Jia, Caiyan and Jiang, Yu-Gang}, title = {SVTRv2: CTC Beats Encoder-Decoder Models in Scene Text Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20147-20156} }
CNS-Bench: Benchmarking Image Classifier Robustness Under Continuous Nuisance Shifts: Olaf Dünkel,

Artur Jesslen,

Jiahao Xie,

Christian Theobalt,

Christian Rupprecht,

Adam Kortylewski; [pdf] [supp]
[bibtex]
@InProceedings{Dunkel_2025_ICCV, author = {D\"unkel, Olaf and Jesslen, Artur and Xie, Jiahao and Theobalt, Christian and Rupprecht, Christian and Kortylewski, Adam}, title = {CNS-Bench: Benchmarking Image Classifier Robustness Under Continuous Nuisance Shifts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19978-19988} }
Noise2Score3D: Tweedie's Approach for Unsupervised Point Cloud Denoising: Xiangbin Wei,

Yuanfeng Wang,

Ao Xu,

Lingyu Zhu,

Dongyong Sun,

Keren Li,

Yang Li,

Qi Qin; [pdf] [supp]
[bibtex]
@InProceedings{Wei_2025_ICCV, author = {Wei, Xiangbin and Wang, Yuanfeng and Xu, Ao and Zhu, Lingyu and Sun, Dongyong and Li, Keren and Li, Yang and Qin, Qi}, title = {Noise2Score3D: Tweedie's Approach for Unsupervised Point Cloud Denoising}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25993-26003} }
I2-World: Intra-Inter Tokenization for Efficient Dynamic 4D Scene Forecasting: Zhimin Liao,

Ping Wei,

Ruijie Zhang,

Shuaijia Chen,

Haoxuan Wang,

Ziyang Ren; [pdf]
[bibtex]
@InProceedings{Liao_2025_ICCV, author = {Liao, Zhimin and Wei, Ping and Zhang, Ruijie and Chen, Shuaijia and Wang, Haoxuan and Ren, Ziyang}, title = {I2-World: Intra-Inter Tokenization for Efficient Dynamic 4D Scene Forecasting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25810-25819} }
Auto-Vocabulary Semantic Segmentation: Osman Ülger,

Maksymilian Kulicki,

Yuki Asano,

Martin R. Oswald; [pdf] [supp]
[bibtex]
@InProceedings{Ulger_2025_ICCV, author = {\"Ulger, Osman and Kulicki, Maksymilian and Asano, Yuki and Oswald, Martin R.}, title = {Auto-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24266-24275} }
MSA2: Multi-task Framework with Structure-aware and Style-adaptive Character Representation for Open-set Chinese Text Recognition: Yangfu Li,

Hongjian Zhan,

Qi Liu,

Li Sun,

Yu-Jie Xiong,

Yue Lu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Yangfu and Zhan, Hongjian and Liu, Qi and Sun, Li and Xiong, Yu-Jie and Lu, Yue}, title = {MSA2: Multi-task Framework with Structure-aware and Style-adaptive Character Representation for Open-set Chinese Text Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23095-23104} }
PCR-GS: COLMAP-Free 3D Gaussian Splatting via Pose Co-Regularizations: Yu Wei,

Jiahui Zhang,

Xiaoqin Zhang,

Ling Shao,

Shijian Lu; [pdf] [supp]
[bibtex]
@InProceedings{Wei_2025_ICCV, author = {Wei, Yu and Zhang, Jiahui and Zhang, Xiaoqin and Shao, Ling and Lu, Shijian}, title = {PCR-GS: COLMAP-Free 3D Gaussian Splatting via Pose Co-Regularizations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26499-26508} }
Fusion Meets Diverse Conditions: A High-diversity Benchmark and Baseline for UAV-based Multimodal Object Detection with Condition Cues: Chen Chen,

Kangcheng Bin,

Ting Hu,

Jiahao Qi,

Xingyue Liu,

Tianpeng Liu,

Zhen Liu,

Yongxiang Liu,

Ping Zhong; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Chen and Bin, Kangcheng and Hu, Ting and Qi, Jiahao and Liu, Xingyue and Liu, Tianpeng and Liu, Zhen and Liu, Yongxiang and Zhong, Ping}, title = {Fusion Meets Diverse Conditions: A High-diversity Benchmark and Baseline for UAV-based Multimodal Object Detection with Condition Cues}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27958-27967} }
Stealthy Backdoor Attack in Federated Learning via Adaptive Layer-wise Gradient Alignment: Qingqian Yang,

Peishen Yan,

Xiaoyu Wu,

Jiaru Zhang,

Tao Song,

Yang Hua,

Hao Wang,

Liangliang Wang,

Haibing Guan; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Qingqian and Yan, Peishen and Wu, Xiaoyu and Zhang, Jiaru and Song, Tao and Hua, Yang and Wang, Hao and Wang, Liangliang and Guan, Haibing}, title = {Stealthy Backdoor Attack in Federated Learning via Adaptive Layer-wise Gradient Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29163-29172} }
DiffPCI: Large Motion Point Cloud frame Interpolation with Diffusion Model: Tianyu Zhang,

Haobo Jiang,

Jian Yang,

Jin Xie; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Tianyu and Jiang, Haobo and Yang, Jian and Xie, Jin}, title = {DiffPCI: Large Motion Point Cloud frame Interpolation with Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27348-27358} }
Auto-Controlled Image Perception in MLLMs via Visual Perception Tokens: Runpeng Yu,

Xinyin Ma,

Xinchao Wang; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Runpeng and Ma, Xinyin and Wang, Xinchao}, title = {Auto-Controlled Image Perception in MLLMs via Visual Perception Tokens}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21822-21831} }
Addressing Representation Collapse in Vector Quantized Models with One Linear Layer: Yongxin Zhu,

Bocheng Li,

Yifei Xin,

Zhihua Xia,

Linli Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Yongxin and Li, Bocheng and Xin, Yifei and Xia, Zhihua and Xu, Linli}, title = {Addressing Representation Collapse in Vector Quantized Models with One Linear Layer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22968-22977} }
Borrowing Eyes for the Blind Spot: Overcoming Data Scarcity in Malicious Video Detection via Cross-Domain Retrieval Augmentation: Rongpei Hong,

Jian Lang,

Ting Zhong,

Fan Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Hong_2025_ICCV, author = {Hong, Rongpei and Lang, Jian and Zhong, Ting and Zhou, Fan}, title = {Borrowing Eyes for the Blind Spot: Overcoming Data Scarcity in Malicious Video Detection via Cross-Domain Retrieval Augmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22728-22737} }
SuperDec: 3D Scene Decomposition with Superquadrics Primitives: Elisabetta Fedele,

Boyang Sun,

Leonidas Guibas,

Marc Pollefeys,

Francis Engelmann; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fedele_2025_ICCV, author = {Fedele, Elisabetta and Sun, Boyang and Guibas, Leonidas and Pollefeys, Marc and Engelmann, Francis}, title = {SuperDec: 3D Scene Decomposition with Superquadrics Primitives}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24625-24635} }
LaRender: Training-Free Occlusion Control in Image Generation via Latent Rendering: Xiaohang Zhan,

Dingming Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhan_2025_ICCV, author = {Zhan, Xiaohang and Liu, Dingming}, title = {LaRender: Training-Free Occlusion Control in Image Generation via Latent Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19679-19688} }
DONUT: A Decoder-Only Model for Trajectory Prediction: Markus Knoche,

Daan de Geus,

Bastian Leibe; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Knoche_2025_ICCV, author = {Knoche, Markus and de Geus, Daan and Leibe, Bastian}, title = {DONUT: A Decoder-Only Model for Trajectory Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28903-28912} }
Scene Coordinate Reconstruction Priors: Wenjing Bian,

Axel Barroso-Laguna,

Tommaso Cavallari,

Victor Adrian Prisacariu,

Eric Brachmann; [pdf] [supp]
[bibtex]
@InProceedings{Bian_2025_ICCV, author = {Bian, Wenjing and Barroso-Laguna, Axel and Cavallari, Tommaso and Prisacariu, Victor Adrian and Brachmann, Eric}, title = {Scene Coordinate Reconstruction Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25765-25776} }
Temporal Overlapping Prediction: A Self-supervised Pre-training Method for LiDAR Moving Object Segmentation: Ziliang Miao,

Runjian Chen,

Yixi Cai,

Buwei He,

Wenquan Zhao,

Wenqi Shao,

Bo Zhang,

Fu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Miao_2025_ICCV, author = {Miao, Ziliang and Chen, Runjian and Cai, Yixi and He, Buwei and Zhao, Wenquan and Shao, Wenqi and Zhang, Bo and Zhang, Fu}, title = {Temporal Overlapping Prediction: A Self-supervised Pre-training Method for LiDAR Moving Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26653-26663} }
No More Sibling Rivalry: Debiasing Human-Object Interaction Detection: Bin Yang,

Yulin Zhang,

Hong-Yu Zhou,

Sibei Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Bin and Zhang, Yulin and Zhou, Hong-Yu and Yang, Sibei}, title = {No More Sibling Rivalry: Debiasing Human-Object Interaction Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22707-22717} }
Temperature in Cosine-based Softmax Loss: Takumi Kobayashi; [pdf] [supp]
[bibtex]
@InProceedings{Kobayashi_2025_ICCV, author = {Kobayashi, Takumi}, title = {Temperature in Cosine-based Softmax Loss}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22199-22208} }
Exploring Probabilistic Modeling Beyond Domain Generalization for Semantic Segmentation: I-Hsiang Chen,

Hua-En Chang,

Wei-Ting Chen,

Jenq-Neng Hwang,

Sy-Yen Kuo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, I-Hsiang and Chang, Hua-En and Chen, Wei-Ting and Hwang, Jenq-Neng and Kuo, Sy-Yen}, title = {Exploring Probabilistic Modeling Beyond Domain Generalization for Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21755-21765} }
Learning Robust Stereo Matching in the Wild with Selective Mixture-of-Experts: Yun Wang,

Longguang Wang,

Chenghao Zhang,

Yongjian Zhang,

Zhanjie Zhang,

Ao Ma,

Chenyou Fan,

Tin Lun Lam,

Junjie Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yun and Wang, Longguang and Zhang, Chenghao and Zhang, Yongjian and Zhang, Zhanjie and Ma, Ao and Fan, Chenyou and Lam, Tin Lun and Hu, Junjie}, title = {Learning Robust Stereo Matching in the Wild with Selective Mixture-of-Experts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21276-21287} }
Token-Efficient VLM: High-Resolution Image Understanding via Dynamic Region Proposal: Yitong Jiang,

Jinwei Gu,

Tianfan Xue,

Ka Chun Cheung,

Pavlo Molchanov,

Hongxu Yin,

Sifei Liu; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2025_ICCV, author = {Jiang, Yitong and Gu, Jinwei and Xue, Tianfan and Cheung, Ka Chun and Molchanov, Pavlo and Yin, Hongxu and Liu, Sifei}, title = {Token-Efficient VLM: High-Resolution Image Understanding via Dynamic Region Proposal}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24147-24158} }
Robust 3D Object Detection using Probabilistic Point Clouds from Single-Photon LiDARs: Bhavya Goyal,

Felipe Gutierrez-Barragan,

Wei Lin,

Andreas Velten,

Yin Li,

Mohit Gupta; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Goyal_2025_ICCV, author = {Goyal, Bhavya and Gutierrez-Barragan, Felipe and Lin, Wei and Velten, Andreas and Li, Yin and Gupta, Mohit}, title = {Robust 3D Object Detection using Probabilistic Point Clouds from Single-Photon LiDARs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28417-28427} }
TimeExpert: An Expert-Guided Video LLM for Video Temporal Grounding: Zuhao Yang,

Yingchen Yu,

Yunqing Zhao,

Shijian Lu,

Song Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Zuhao and Yu, Yingchen and Zhao, Yunqing and Lu, Shijian and Bai, Song}, title = {TimeExpert: An Expert-Guided Video LLM for Video Temporal Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24286-24296} }
GS-Occ3D: Scaling Vision-only Occupancy Reconstruction with Gaussian Splatting: Baijun Ye,

Minghui Qin,

Saining Zhang,

Moonjun Gong,

Shaoting Zhu,

Hao Zhao,

Hang Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Ye_2025_ICCV, author = {Ye, Baijun and Qin, Minghui and Zhang, Saining and Gong, Moonjun and Zhu, Shaoting and Zhao, Hao and Zhao, Hang}, title = {GS-Occ3D: Scaling Vision-only Occupancy Reconstruction with Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25925-25937} }
R-LiViT: A LiDAR-Visual-Thermal Dataset Enabling Vulnerable Road User Focused Roadside Perception: Jonas Mirlach,

Lei Wan,

Andreas Wiedholz,

Hannan Ejaz Keen,

Andreas Eich; [pdf]
[bibtex]
@InProceedings{Mirlach_2025_ICCV, author = {Mirlach, Jonas and Wan, Lei and Wiedholz, Andreas and Keen, Hannan Ejaz and Eich, Andreas}, title = {R-LiViT: A LiDAR-Visual-Thermal Dataset Enabling Vulnerable Road User Focused Roadside Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28375-28384} }
Spatially-Varying Autofocus: Yingsi Qin,

Aswin C. Sankaranarayanan,

Matthew O'Toole; [pdf] [supp]
[bibtex]
@InProceedings{Qin_2025_ICCV, author = {Qin, Yingsi and Sankaranarayanan, Aswin C. and O'Toole, Matthew}, title = {Spatially-Varying Autofocus}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24645-24654} }
ZeroStereo: Zero-shot Stereo Matching from Single Images: Xianqi Wang,

Hao Yang,

Gangwei Xu,

Junda Cheng,

Min Lin,

Yong Deng,

Jinliang Zang,

Yurui Chen,

Xin Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Xianqi and Yang, Hao and Xu, Gangwei and Cheng, Junda and Lin, Min and Deng, Yong and Zang, Jinliang and Chen, Yurui and Yang, Xin}, title = {ZeroStereo: Zero-shot Stereo Matching from Single Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28177-28187} }
Training-Free Industrial Defect Generation with Diffusion Models: Ruyi Xu,

Yen-Tzu Chiu,

Tai-I Chen,

Oscar Chew,

Yung-Yu Chuang,

Wen-Huang Cheng; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Ruyi and Chiu, Yen-Tzu and Chen, Tai-I and Chew, Oscar and Chuang, Yung-Yu and Cheng, Wen-Huang}, title = {Training-Free Industrial Defect Generation with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24214-24223} }
Hierarchical Event Memory for Accurate and Low-latency Online Video Temporal Grounding: Minghang Zheng,

Yuxin Peng,

Benyuan Sun,

Yi Yang,

Yang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Minghang and Peng, Yuxin and Sun, Benyuan and Yang, Yi and Liu, Yang}, title = {Hierarchical Event Memory for Accurate and Low-latency Online Video Temporal Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21589-21599} }
Interaction-Merged Motion Planning: Effectively Leveraging Diverse Motion Datasets for Robust Planning: Giwon Lee,

Wooseong Jeong,

Daehee Park,

Jaewoo Jeong,

Kuk-Jin Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Giwon and Jeong, Wooseong and Park, Daehee and Jeong, Jaewoo and Yoon, Kuk-Jin}, title = {Interaction-Merged Motion Planning: Effectively Leveraging Diverse Motion Datasets for Robust Planning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28610-28621} }
DeRIS: Decoupling Perception and Cognition for Enhanced Referring Image Segmentation through Loopback Synergy: Ming Dai,

Wenxuan Cheng,

Jiang-jiang Liu,

Sen Yang,

Wenxiao Cai,

Yanpeng Sun,

Wankou Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dai_2025_ICCV, author = {Dai, Ming and Cheng, Wenxuan and Liu, Jiang-jiang and Yang, Sen and Cai, Wenxiao and Sun, Yanpeng and Yang, Wankou}, title = {DeRIS: Decoupling Perception and Cognition for Enhanced Referring Image Segmentation through Loopback Synergy}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19936-19946} }
ZIM: Zero-Shot Image Matting for Anything: Beomyoung Kim,

Chanyong Shin,

Joonhyun Jeong,

Hyungsik Jung,

Se-Yun Lee,

Sewhan Chun,

Dong-Hyun Hwang,

Joonsang Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Beomyoung and Shin, Chanyong and Jeong, Joonhyun and Jung, Hyungsik and Lee, Se-Yun and Chun, Sewhan and Hwang, Dong-Hyun and Yu, Joonsang}, title = {ZIM: Zero-Shot Image Matting for Anything}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23828-23838} }
Skip-Vision: Efficient and Scalable Acceleration of Vision-Language Models via Adaptive Token Skipping: Weili Zeng,

Ziyuan Huang,

Kaixiang Ji,

Yichao Yan; [pdf] [supp]
[bibtex]
@InProceedings{Zeng_2025_ICCV, author = {Zeng, Weili and Huang, Ziyuan and Ji, Kaixiang and Yan, Yichao}, title = {Skip-Vision: Efficient and Scalable Acceleration of Vision-Language Models via Adaptive Token Skipping}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21384-21397} }
Enrich and Detect: Video Temporal Grounding with Multimodal LLMs: Shraman Pramanick,

Effrosyni Mavroudi,

Yale Song,

Rama Chellappa,

Lorenzo Torresani,

Triantafyllos Afouras; [pdf] [supp]
[bibtex]
@InProceedings{Pramanick_2025_ICCV, author = {Pramanick, Shraman and Mavroudi, Effrosyni and Song, Yale and Chellappa, Rama and Torresani, Lorenzo and Afouras, Triantafyllos}, title = {Enrich and Detect: Video Temporal Grounding with Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24297-24308} }
Class Token as Proxy: Optimal Transport-assisted Proxy Learning for Weakly Supervised Semantic Segmentation: Jian Wang,

Tianhong Dai,

Bingfeng Zhang,

Siyue Yu,

Eng Gee Lim,

Jimin Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Jian and Dai, Tianhong and Zhang, Bingfeng and Yu, Siyue and Lim, Eng Gee and Xiao, Jimin}, title = {Class Token as Proxy: Optimal Transport-assisted Proxy Learning for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21645-21654} }
FIND: Few-Shot Anomaly Inspection with Normal-Only Multi-Modal Data: Yiting Li,

Fayao Liu,

Jingyi Liao,

Sichao Tian,

Chuan-Sheng Foo,

Xulei Yang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Yiting and Liu, Fayao and Liao, Jingyi and Tian, Sichao and Foo, Chuan-Sheng and Yang, Xulei}, title = {FIND: Few-Shot Anomaly Inspection with Normal-Only Multi-Modal Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23290-23299} }
MaskSAM: Auto-prompt SAM with Mask Classification for Volumetric Medical Image Segmentation: Bin Xie,

Hao Tang,

Bin Duan,

Dawen Cai,

Yan Yan,

Gady Agam; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2025_ICCV, author = {Xie, Bin and Tang, Hao and Duan, Bin and Cai, Dawen and Yan, Yan and Agam, Gady}, title = {MaskSAM: Auto-prompt SAM with Mask Classification for Volumetric Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24423-24433} }
AID: Adapting Image2Video Diffusion Models for Instruction-guided Video Prediction: Zhen Xing,

Qi Dai,

Zejia Weng,

Zuxuan Wu,

Yu-Gang Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xing_2025_ICCV, author = {Xing, Zhen and Dai, Qi and Weng, Zejia and Wu, Zuxuan and Jiang, Yu-Gang}, title = {AID: Adapting Image2Video Diffusion Models for Instruction-guided Video Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21243-21253} }
Splat-based 3D Scene Reconstruction with Extreme Motion-blur: Hyeonjoong Jang,

Dongyoung Choi,

Donggun Kim,

Woohyun Kang,

Min H. Kim; [pdf] [supp]
[bibtex]
@InProceedings{Jang_2025_ICCV, author = {Jang, Hyeonjoong and Choi, Dongyoung and Kim, Donggun and Kang, Woohyun and Kim, Min H.}, title = {Splat-based 3D Scene Reconstruction with Extreme Motion-blur}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26425-26434} }
Towards Robustness of Person Search against Corruptions: Woojung Son,

Yoonki Cho,

Guoyuan An,

Chanmi Lee,

Sung-Eui Yoon; [pdf] [supp]
[bibtex]
@InProceedings{Son_2025_ICCV, author = {Son, Woojung and Cho, Yoonki and An, Guoyuan and Lee, Chanmi and Yoon, Sung-Eui}, title = {Towards Robustness of Person Search against Corruptions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23408-23418} }
INSTINCT: Instance-Level Interaction Architecture for Query-Based Collaborative Perception: Yunjiang Xu,

Lingzhi Li,

Jin Wang,

Yupeng Ouyang,

Benyuan Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Yunjiang and Li, Lingzhi and Wang, Jin and Ouyang, Yupeng and Yang, Benyuan}, title = {INSTINCT: Instance-Level Interaction Architecture for Query-Based Collaborative Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25464-25473} }
DriveX: Omni Scene Modeling for Learning Generalizable World Knowledge in Autonomous Driving: Chen Shi,

Shaoshuai Shi,

Kehua Sheng,

Bo Zhang,

Li Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2025_ICCV, author = {Shi, Chen and Shi, Shaoshuai and Sheng, Kehua and Zhang, Bo and Jiang, Li}, title = {DriveX: Omni Scene Modeling for Learning Generalizable World Knowledge in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28599-28609} }
MagicCity: Geometry-Aware 3D City Generation from Satellite Imagery with Multi-View Consistency: Xingbo Yao,

Xuanmin Wang,

Hao Wu,

Chengliang Ping,

Doudou Zhang,

Hui Xiong; [pdf] [supp]
[bibtex]
@InProceedings{Yao_2025_ICCV, author = {Yao, Xingbo and Wang, Xuanmin and Wu, Hao and Ping, Chengliang and Zhang, Doudou and Xiong, Hui}, title = {MagicCity: Geometry-Aware 3D City Generation from Satellite Imagery with Multi-View Consistency}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25325-25334} }
RadGPT: Constructing 3D Image-Text Tumor Datasets: Pedro R.A.S. Bassi,

Mehmet Can Yavuz,

Ibrahim Ethem Hamamci,

Sezgin Er,

Xiaoxi Chen,

Wenxuan Li,

Bjoern Menze,

Sergio Decherchi,

Andrea Cavalli,

Kang Wang,

Yang Yang,

Alan Yuille,

Zongwei Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Bassi_2025_ICCV, author = {Bassi, Pedro R.A.S. and Yavuz, Mehmet Can and Hamamci, Ibrahim Ethem and Er, Sezgin and Chen, Xiaoxi and Li, Wenxuan and Menze, Bjoern and Decherchi, Sergio and Cavalli, Andrea and Wang, Kang and Yang, Yang and Yuille, Alan and Zhou, Zongwei}, title = {RadGPT: Constructing 3D Image-Text Tumor Datasets}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23720-23730} }
LangBridge: Interpreting Image as a Combination of Language Embeddings: Jiaqi Liao,

Yuwei Niu,

Fanqing Meng,

Hao Li,

Changyao Tian,

Yinuo Du,

Yuwen Xiong,

Dianqi Li,

Xizhou Zhu,

Li Yuan,

Jifeng Dai,

Yu Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liao_2025_ICCV, author = {Liao, Jiaqi and Niu, Yuwei and Meng, Fanqing and Li, Hao and Tian, Changyao and Du, Yinuo and Xiong, Yuwen and Li, Dianqi and Zhu, Xizhou and Yuan, Li and Dai, Jifeng and Cheng, Yu}, title = {LangBridge: Interpreting Image as a Combination of Language Embeddings}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23752-23762} }
Quantifying and Narrowing the Unknown: Interactive Text-to-Video Retrieval via Uncertainty Minimization: Bingqing Zhang,

Zhuo Cao,

Heming Du,

Yang Li,

Xue Li,

Jiajun Liu,

Sen Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Bingqing and Cao, Zhuo and Du, Heming and Li, Yang and Li, Xue and Liu, Jiajun and Wang, Sen}, title = {Quantifying and Narrowing the Unknown: Interactive Text-to-Video Retrieval via Uncertainty Minimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22120-22130} }
GECKO: Gigapixel Vision-Concept Contrastive Pretraining in Histopathology: Saarthak Kapse,

Pushpak Pati,

Srikar Yellapragada,

Srijan Das,

Rajarsi R. Gupta,

Joel Saltz,

Dimitris Samaras,

Prateek Prasanna; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kapse_2025_ICCV, author = {Kapse, Saarthak and Pati, Pushpak and Yellapragada, Srikar and Das, Srijan and Gupta, Rajarsi R. and Saltz, Joel and Samaras, Dimitris and Prasanna, Prateek}, title = {GECKO: Gigapixel Vision-Concept Contrastive Pretraining in Histopathology}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20020-20030} }
Compression of 3D Gaussian Splatting with Optimized Feature Planes and Standard Video Codecs: Soonbin Lee,

Fangwen Shu,

Yago Sanchez,

Thomas Schierl,

Cornelius Hellge; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Soonbin and Shu, Fangwen and Sanchez, Yago and Schierl, Thomas and Hellge, Cornelius}, title = {Compression of 3D Gaussian Splatting with Optimized Feature Planes and Standard Video Codecs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25496-25505} }
CULTURE3D: A Large-Scale and Diverse Dataset of Cultural Landmarks and Terrains for Gaussian-Based Scene Rendering: Xinyi Zheng,

Steve Zhang,

Weizhe Lin,

Aaron Zhang,

Walterio W. Mayol-Cuevas,

Yunze Liu,

Junxiao Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Xinyi and Zhang, Steve and Lin, Weizhe and Zhang, Aaron and Mayol-Cuevas, Walterio W. and Liu, Yunze and Shen, Junxiao}, title = {CULTURE3D: A Large-Scale and Diverse Dataset of Cultural Landmarks and Terrains for Gaussian-Based Scene Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29064-29074} }
Geo4D: Leveraging Video Generators for Geometric 4D Scene Reconstruction: Zeren Jiang,

Chuanxia Zheng,

Iro Laina,

Diane Larlus,

Andrea Vedaldi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_ICCV, author = {Jiang, Zeren and Zheng, Chuanxia and Laina, Iro and Larlus, Diane and Vedaldi, Andrea}, title = {Geo4D: Leveraging Video Generators for Geometric 4D Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20658-20671} }
REPARO: Compositional 3D Assets Generation with Differentiable 3D Layout Alignment: Haonan Han,

Rui Yang,

Huan Liao,

Jiankai Xing,

Zunnan Xu,

Xiaoming Yu,

Junwei Zha,

Xiu Li,

Wanhua Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2025_ICCV, author = {Han, Haonan and Yang, Rui and Liao, Huan and Xing, Jiankai and Xu, Zunnan and Yu, Xiaoming and Zha, Junwei and Li, Xiu and Li, Wanhua}, title = {REPARO: Compositional 3D Assets Generation with Differentiable 3D Layout Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25367-25377} }
SparseMM: Head Sparsity Emerges from Visual Concept Responses in MLLMs: Jiahui Wang,

Zuyan Liu,

Yongming Rao,

Jiwen Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Jiahui and Liu, Zuyan and Rao, Yongming and Lu, Jiwen}, title = {SparseMM: Head Sparsity Emerges from Visual Concept Responses in MLLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23177-23187} }
Focal Plane Visual Feature Generation and Matching on a Pixel Processor Array: Hongyi Zhang,

Laurie Bose,

Jianing Chen,

Piotr Dudek,

Walterio Mayol-Cuevas; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Hongyi and Bose, Laurie and Chen, Jianing and Dudek, Piotr and Mayol-Cuevas, Walterio}, title = {Focal Plane Visual Feature Generation and Matching on a Pixel Processor Array}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29031-29039} }
Q-Frame: Query-aware Frame Selection and Multi-Resolution Adaptation for Video-LLMs: Shaojie Zhang,

Jiahui Yang,

Jianqin Yin,

Zhenbo Luo,

Jian Luan; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Shaojie and Yang, Jiahui and Yin, Jianqin and Luo, Zhenbo and Luan, Jian}, title = {Q-Frame: Query-aware Frame Selection and Multi-Resolution Adaptation for Video-LLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22056-22065} }
Prompt-driven Transferable Adversarial Attack on Person Re-Identification with Attribute-aware Textual Inversion: Yuan Bian,

Min Liu,

Yunqi Yi,

Xueping Wang,

Shuai Jiang,

Yaonan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bian_2025_ICCV, author = {Bian, Yuan and Liu, Min and Yi, Yunqi and Wang, Xueping and Jiang, Shuai and Wang, Yaonan}, title = {Prompt-driven Transferable Adversarial Attack on Person Re-Identification with Attribute-aware Textual Inversion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22599-22609} }
MDP-Omni: Parameter-free Multimodal Depth Prior-based Sampling for Omnidirectional Stereo Matching: Eunjin Son,

HyungGi Jo,

Wookyong Kwon,

Sang Jun Lee; [pdf]
[bibtex]
@InProceedings{Son_2025_ICCV, author = {Son, Eunjin and Jo, HyungGi and Kwon, Wookyong and Lee, Sang Jun}, title = {MDP-Omni: Parameter-free Multimodal Depth Prior-based Sampling for Omnidirectional Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26178-26187} }
RoboTron-Sim: Improving Real-World Driving via Simulated Hard-Case: Baihui Xiao,

Chengjian Feng,

Zhijian Huang,

Feng Yan,

Yujie Zhong,

Lin Ma; [pdf] [supp]
[bibtex]
@InProceedings{Xiao_2025_ICCV, author = {Xiao, Baihui and Feng, Chengjian and Huang, Zhijian and Yan, Feng and Zhong, Yujie and Ma, Lin}, title = {RoboTron-Sim: Improving Real-World Driving via Simulated Hard-Case}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27380-27389} }
Cross-Architecture Distillation Made Simple with Redundancy Suppression: Weijia Zhang,

Yuehao Liu,

Wu Ran,

Chao Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Weijia and Liu, Yuehao and Ran, Wu and Ma, Chao}, title = {Cross-Architecture Distillation Made Simple with Redundancy Suppression}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23256-23266} }
M2EIT: Multi-Domain Mixture of Experts for Robust Neural Inertial Tracking: Yan Li,

Yang Xu,

Changhao Chen,

Zhongchen Shi,

Wei Chen,

Liang Xie,

Hongbo Chen,

Erwei Yin; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Yan and Xu, Yang and Chen, Changhao and Shi, Zhongchen and Chen, Wei and Xie, Liang and Chen, Hongbo and Yin, Erwei}, title = {M2EIT: Multi-Domain Mixture of Experts for Robust Neural Inertial Tracking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28207-28216} }
Identity-aware Language Gaussian Splatting for Open-vocabulary 3D Semantic Segmentation: SungMin Jang,

Wonjun Kim; [pdf] [supp]
[bibtex]
@InProceedings{Jang_2025_ICCV, author = {Jang, SungMin and Kim, Wonjun}, title = {Identity-aware Language Gaussian Splatting for Open-vocabulary 3D Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20467-20476} }
Degradation-Modeled Multipath Diffusion for Tunable Metalens Photography: Jianing Zhang,

Jiayi Zhu,

Feiyu Ji,

Xiaokang Yang,

Xiaoyun Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Jianing and Zhu, Jiayi and Ji, Feiyu and Yang, Xiaokang and Yuan, Xiaoyun}, title = {Degradation-Modeled Multipath Diffusion for Tunable Metalens Photography}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25914-25924} }
Driving View Synthesis on Free-form Trajectories with Generative Prior: Zeyu Yang,

Zijie Pan,

Yuankun Yang,

Xiatian Zhu,

Li Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Zeyu and Pan, Zijie and Yang, Yuankun and Zhu, Xiatian and Zhang, Li}, title = {Driving View Synthesis on Free-form Trajectories with Generative Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28083-28092} }
StochasticSplats: Stochastic Rasterization for Sorting-Free 3D Gaussian Splatting: Shakiba Kheradmand,

Delio Vicini,

George Kopanas,

Dmitry Lagun,

Kwang Moo Yi,

Mark Matthews,

Andrea Tagliasacchi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kheradmand_2025_ICCV, author = {Kheradmand, Shakiba and Vicini, Delio and Kopanas, George and Lagun, Dmitry and Yi, Kwang Moo and Matthews, Mark and Tagliasacchi, Andrea}, title = {StochasticSplats: Stochastic Rasterization for Sorting-Free 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26326-26335} }
Aligning Information Capacity Between Vision and Language via Dense-to-Sparse Feature Distillation for Image-Text Matching: Yang Liu,

Wentao Feng,

Zhuoyao Liu,

Shudong Huang,

Jiancheng Lv; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Yang and Feng, Wentao and Liu, Zhuoyao and Huang, Shudong and Lv, Jiancheng}, title = {Aligning Information Capacity Between Vision and Language via Dense-to-Sparse Feature Distillation for Image-Text Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21679-21688} }
A Lesson in Splats: Teacher-Guided Diffusion for 3D Gaussian Splats Generation with 2D Supervision: Chensheng Peng,

Ido Sobol,

Masayoshi Tomizuka,

Kurt Keutzer,

Chenfeng Xu,

Or Litany; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2025_ICCV, author = {Peng, Chensheng and Sobol, Ido and Tomizuka, Masayoshi and Keutzer, Kurt and Xu, Chenfeng and Litany, Or}, title = {A Lesson in Splats: Teacher-Guided Diffusion for 3D Gaussian Splats Generation with 2D Supervision}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28707-28717} }
Boosting Vision Semantic Density with Anatomy Normality Modeling for Medical Vision-language Pre-training: Weiwei Cao,

Jianpeng Zhang,

Zhongyi Shui,

Sinuo Wang,

Zeli Chen,

Xi Li,

Le Lu,

Xianghua Ye,

Qi Zhang,

Tingbo Liang,

Ling Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2025_ICCV, author = {Cao, Weiwei and Zhang, Jianpeng and Shui, Zhongyi and Wang, Sinuo and Chen, Zeli and Li, Xi and Lu, Le and Ye, Xianghua and Zhang, Qi and Liang, Tingbo and Zhang, Ling}, title = {Boosting Vision Semantic Density with Anatomy Normality Modeling for Medical Vision-language Pre-training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23041-23050} }
GAP: Gaussianize Any Point Clouds with Text Guidance: Weiqi Zhang,

Junsheng Zhou,

Haotian Geng,

Wenyuan Zhang,

Yu-Shen Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Weiqi and Zhou, Junsheng and Geng, Haotian and Zhang, Wenyuan and Liu, Yu-Shen}, title = {GAP: Gaussianize Any Point Clouds with Text Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25627-25638} }
Flow4Agent: Long-form Video Understanding via Motion Prior from Optical Flow: Ruyang Liu,

Shangkun Sun,

Haoran Tang,

Wei Gao,

Ge Li; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Ruyang and Sun, Shangkun and Tang, Haoran and Gao, Wei and Li, Ge}, title = {Flow4Agent: Long-form Video Understanding via Motion Prior from Optical Flow}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23817-23827} }
Moto: Latent Motion Token as the Bridging Language for Learning Robot Manipulation from Videos: Yi Chen,

Yuying Ge,

Weiliang Tang,

Yizhuo Li,

Yixiao Ge,

Mingyu Ding,

Ying Shan,

Xihui Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Yi and Ge, Yuying and Tang, Weiliang and Li, Yizhuo and Ge, Yixiao and Ding, Mingyu and Shan, Ying and Liu, Xihui}, title = {Moto: Latent Motion Token as the Bridging Language for Learning Robot Manipulation from Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19752-19763} }
BezierGS: Dynamic Urban Scene Reconstruction with Bezier Curve Gaussian Splatting: Zipei Ma,

Junzhe Jiang,

Yurui Chen,

Li Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_ICCV, author = {Ma, Zipei and Jiang, Junzhe and Chen, Yurui and Zhang, Li}, title = {BezierGS: Dynamic Urban Scene Reconstruction with Bezier Curve Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25519-25528} }
RealCam-I2V: Real-World Image-to-Video Generation with Interactive Complex Camera Control: Teng Li,

Guangcong Zheng,

Rui Jiang,

Shuigen Zhan,

Tao Wu,

Yehao Lu,

Yining Lin,

Chuanyun Deng,

Yepan Xiong,

Min Chen,

Lin Cheng,

Xi Li; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Teng and Zheng, Guangcong and Jiang, Rui and Zhan, Shuigen and Wu, Tao and Lu, Yehao and Lin, Yining and Deng, Chuanyun and Xiong, Yepan and Chen, Min and Cheng, Lin and Li, Xi}, title = {RealCam-I2V: Real-World Image-to-Video Generation with Interactive Complex Camera Control}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28785-28796} }
Breaking the Encoder Barrier for Seamless Video-Language Understanding: Handong Li,

Yiyuan Zhang,

Longteng Guo,

Xiangyu Yue,

Jing Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Handong and Zhang, Yiyuan and Guo, Longteng and Yue, Xiangyu and Liu, Jing}, title = {Breaking the Encoder Barrier for Seamless Video-Language Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23167-23176} }
Simulating Dual-Pixel Images From Ray Tracing For Depth Estimation: Fengchen He,

Dayang Zhao,

Hao Xu,

Tingwei Quan,

Shaoqun Zeng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2025_ICCV, author = {He, Fengchen and Zhao, Dayang and Xu, Hao and Quan, Tingwei and Zeng, Shaoqun}, title = {Simulating Dual-Pixel Images From Ray Tracing For Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26106-26115} }
Training-Free Class Purification for Open-Vocabulary Semantic Segmentation: Qi Chen,

Lingxiao Yang,

Yun Chen,

Nailong Zhao,

Jianhuang Lai,

Jie Shao,

Xiaohua Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Qi and Yang, Lingxiao and Chen, Yun and Zhao, Nailong and Lai, Jianhuang and Shao, Jie and Xie, Xiaohua}, title = {Training-Free Class Purification for Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23124-23134} }
Generalizable Object Re-Identification via Visual In-Context Prompting: Zhizhong Huang,

Xiaoming Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Zhizhong and Liu, Xiaoming}, title = {Generalizable Object Re-Identification via Visual In-Context Prompting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22539-22550} }
Pseudo-SD: Pseudo Controlled Stable Diffusion for Semi-Supervised and Cross-Domain Semantic Segmentation: Dong Zhao,

Qi Zang,

Shuang Wang,

Nicu Sebe,

Zhun Zhong; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_ICCV, author = {Zhao, Dong and Zang, Qi and Wang, Shuang and Sebe, Nicu and Zhong, Zhun}, title = {Pseudo-SD: Pseudo Controlled Stable Diffusion for Semi-Supervised and Cross-Domain Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22393-22403} }
MaTVLM: Hybrid Mamba-Transformer for Efficient Vision-Language Modeling: Yingyue Li,

Bencheng Liao,

Wenyu Liu,

Xinggang Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Yingyue and Liao, Bencheng and Liu, Wenyu and Wang, Xinggang}, title = {MaTVLM: Hybrid Mamba-Transformer for Efficient Vision-Language Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20878-20888} }
HiNeuS: High-fidelity Neural Surface Mitigating Low-texture and Reflective Ambiguity: Yida Wang,

Xueyang Zhang,

Kun Zhan,

Peng Jia,

Xianpeng Lang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Yida and Zhang, Xueyang and Zhan, Kun and Jia, Peng and Lang, Xianpeng}, title = {HiNeuS: High-fidelity Neural Surface Mitigating Low-texture and Reflective Ambiguity}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25746-25755} }
CaptionSmiths: Flexibly Controlling Language Pattern in Image Captioning: Kuniaki Saito,

Donghyun Kim,

Kwanyong Park,

Atsushi Hashimoto,

Yoshitaka Ushiku; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Saito_2025_ICCV, author = {Saito, Kuniaki and Kim, Donghyun and Park, Kwanyong and Hashimoto, Atsushi and Ushiku, Yoshitaka}, title = {CaptionSmiths: Flexibly Controlling Language Pattern in Image Captioning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19872-19881} }
CoHD: A Counting-Aware Hierarchical Decoding Framework for Generalized Referring Expression Segmentation: Zhuoyan Luo,

Yinghao Wu,

Tianheng Cheng,

Yong Liu,

Yicheng Xiao,

Hongfa Wang,

Xiao-Ping Zhang,

Yujiu Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2025_ICCV, author = {Luo, Zhuoyan and Wu, Yinghao and Cheng, Tianheng and Liu, Yong and Xiao, Yicheng and Wang, Hongfa and Zhang, Xiao-Ping and Yang, Yujiu}, title = {CoHD: A Counting-Aware Hierarchical Decoding Framework for Generalized Referring Expression Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22685-22694} }
LANGTRAJ: Diffusion Model and Dataset for Language-Conditioned Trajectory Simulation: Wei-Jer Chang,

Wei Zhan,

Masayoshi Tomizuka,

Manmohan Chandraker,

Francesco Pittaluga; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chang_2025_ICCV, author = {Chang, Wei-Jer and Zhan, Wei and Tomizuka, Masayoshi and Chandraker, Manmohan and Pittaluga, Francesco}, title = {LANGTRAJ: Diffusion Model and Dataset for Language-Conditioned Trajectory Simulation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26622-26631} }
Generative Active Learning for Long-tail Trajectory Prediction via Controllable Diffusion Model: Daehee Park,

Monu Surana,

Pranav Desai,

Ashish Mehta,

Reuben MV John,

Kuk-Jin Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2025_ICCV, author = {Park, Daehee and Surana, Monu and Desai, Pranav and Mehta, Ashish and John, Reuben MV and Yoon, Kuk-Jin}, title = {Generative Active Learning for Long-tail Trajectory Prediction via Controllable Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27839-27850} }
Bridging Local Inductive Bias and Long-Range Dependencies with Pixel-Mamba for End-to-end Whole Slide Image Analysis: Zhongwei Qiu,

Hanqing Chao,

Tiancheng Lin,

Wanxing Chang,

Zijiang Yang,

Wenpei Jiao,

Yixuan Shen,

Yunshuo Zhang,

Yelin Yang,

Wenbin Liu,

Hui Jiang,

Yun Bian,

Ke Yan,

Dakai Jin,

Le Lu; [pdf] [supp]
[bibtex]
@InProceedings{Qiu_2025_ICCV, author = {Qiu, Zhongwei and Chao, Hanqing and Lin, Tiancheng and Chang, Wanxing and Yang, Zijiang and Jiao, Wenpei and Shen, Yixuan and Zhang, Yunshuo and Yang, Yelin and Liu, Wenbin and Jiang, Hui and Bian, Yun and Yan, Ke and Jin, Dakai and Lu, Le}, title = {Bridging Local Inductive Bias and Long-Range Dependencies with Pixel-Mamba for End-to-end Whole Slide Image Analysis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22738-22747} }
TARS: Traffic-Aware Radar Scene Flow Estimation: Jialong Wu,

Marco Braun,

Dominic Spata,

Matthias Rottmann; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Jialong and Braun, Marco and Spata, Dominic and Rottmann, Matthias}, title = {TARS: Traffic-Aware Radar Scene Flow Estimation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26075-26084} }
Unified Multi-Agent Trajectory Modeling with Masked Trajectory Diffusion: Songru Yang,

Zhenwei Shi,

Zhengxia Zou; [pdf]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Songru and Shi, Zhenwei and Zou, Zhengxia}, title = {Unified Multi-Agent Trajectory Modeling with Masked Trajectory Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27563-27574} }
ChatReID: Open-ended Interactive Person Retrieval via Hierarchical Progressive Tuning for Vision Language Models: Ke Niu,

Haiyang Yu,

Mengyang Zhao,

Teng Fu,

Siyang Yi,

Wei Lu,

Bin Li,

Xuelin Qian,

Xiangyang Xue; [pdf] [arXiv]
[bibtex]
@InProceedings{Niu_2025_ICCV, author = {Niu, Ke and Yu, Haiyang and Zhao, Mengyang and Fu, Teng and Yi, Siyang and Lu, Wei and Li, Bin and Qian, Xuelin and Xue, Xiangyang}, title = {ChatReID: Open-ended Interactive Person Retrieval via Hierarchical Progressive Tuning for Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24245-24254} }
TAViS: Text-bridged Audio-Visual Segmentation with Foundation Models: Ziyang Luo,

Nian Liu,

Xuguang Yang,

Salman Khan,

Rao Muhammad Anwer,

Hisham Cholakkal,

Fahad Shahbaz Khan,

Junwei Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2025_ICCV, author = {Luo, Ziyang and Liu, Nian and Yang, Xuguang and Khan, Salman and Anwer, Rao Muhammad and Cholakkal, Hisham and Khan, Fahad Shahbaz and Han, Junwei}, title = {TAViS: Text-bridged Audio-Visual Segmentation with Foundation Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24014-24023} }
ORION: A Holistic End-to-End Autonomous Driving Framework by Vision-Language Instructed Action Generation: Haoyu Fu,

Diankun Zhang,

Zongchuang Zhao,

Jianfeng Cui,

Dingkang Liang,

Chong Zhang,

Dingyuan Zhang,

Hongwei Xie,

Bing Wang,

Xiang Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fu_2025_ICCV, author = {Fu, Haoyu and Zhang, Diankun and Zhao, Zongchuang and Cui, Jianfeng and Liang, Dingkang and Zhang, Chong and Zhang, Dingyuan and Xie, Hongwei and Wang, Bing and Bai, Xiang}, title = {ORION: A Holistic End-to-End Autonomous Driving Framework by Vision-Language Instructed Action Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24823-24834} }
LaCoOT: Layer Collapse through Optimal Transport: Victor Quétu,

Zhu Liao,

Nour Hezbri,

Fabio Pizzati,

Enzo Tartaglione; [pdf] [supp]
[bibtex]
@InProceedings{Quetu_2025_ICCV, author = {Qu\'etu, Victor and Liao, Zhu and Hezbri, Nour and Pizzati, Fabio and Tartaglione, Enzo}, title = {LaCoOT: Layer Collapse through Optimal Transport}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20497-20507} }
ReferDINO: Referring Video Object Segmentation with Visual Grounding Foundations: Tianming Liang,

Kun-Yu Lin,

Chaolei Tan,

Jianguo Zhang,

Wei-Shi Zheng,

Jian-Fang Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2025_ICCV, author = {Liang, Tianming and Lin, Kun-Yu and Tan, Chaolei and Zhang, Jianguo and Zheng, Wei-Shi and Hu, Jian-Fang}, title = {ReferDINO: Referring Video Object Segmentation with Visual Grounding Foundations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20009-20019} }
NuiScene: Exploring Efficient Generation of Unbounded Outdoor Scenes: Han-Hung Lee,

Qinghong Han,

Angel X. Chang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Han-Hung and Han, Qinghong and Chang, Angel X.}, title = {NuiScene: Exploring Efficient Generation of Unbounded Outdoor Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26509-26518} }
DM-EFS: Dynamically Multiplexed Expanded Features Set Form for Robust and Efficient Small Object Detection: Aashish Sharma; [pdf] [supp]
[bibtex]
@InProceedings{Sharma_2025_ICCV, author = {Sharma, Aashish}, title = {DM-EFS: Dynamically Multiplexed Expanded Features Set Form for Robust and Efficient Small Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24569-24579} }
AirCache: Activating Inter-modal Relevancy KV Cache Compression for Efficient Large Vision-Language Model Inference: Kai Huang,

Hao Zou,

Bochen Wang,

Ye Xi,

Zhen Xie,

Hao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Kai and Zou, Hao and Wang, Bochen and Xi, Ye and Xie, Zhen and Wang, Hao}, title = {AirCache: Activating Inter-modal Relevancy KV Cache Compression for Efficient Large Vision-Language Model Inference}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23958-23967} }
SSVQ: Unleashing the Potential of Vector Quantization with Sign-Splitting: Shuaiting Li,

Juncan Deng,

Chengxuan Wang,

Kedong Xu,

Rongtao Deng,

Hong Gu,

Haibin Shen,

Kejie Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Shuaiting and Deng, Juncan and Wang, Chengxuan and Xu, Kedong and Deng, Rongtao and Gu, Hong and Shen, Haibin and Huang, Kejie}, title = {SSVQ: Unleashing the Potential of Vector Quantization with Sign-Splitting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23710-23719} }
DrivingGPT: Unifying Driving World Modeling and Planning with Multi-modal Autoregressive Transformers: Yuntao Chen,

Yuqi Wang,

Zhaoxiang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Yuntao and Wang, Yuqi and Zhang, Zhaoxiang}, title = {DrivingGPT: Unifying Driving World Modeling and Planning with Multi-modal Autoregressive Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26890-26900} }
GS-ID: Illumination Decomposition on Gaussian Splatting via Adaptive Light Aggregation and Diffusion-Guided Material Priors: Kang Du,

Zhihao Liang,

Yulin Shen,

Zeyu Wang; [pdf] [supp]
[bibtex]
@InProceedings{Du_2025_ICCV, author = {Du, Kang and Liang, Zhihao and Shen, Yulin and Wang, Zeyu}, title = {GS-ID: Illumination Decomposition on Gaussian Splatting via Adaptive Light Aggregation and Diffusion-Guided Material Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26220-26229} }
4D Gaussian Splatting SLAM: Yanyan Li,

Youxu Fang,

Zunjie Zhu,

Kunyi Li,

Yong Ding,

Federico Tombari; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Yanyan and Fang, Youxu and Zhu, Zunjie and Li, Kunyi and Ding, Yong and Tombari, Federico}, title = {4D Gaussian Splatting SLAM}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25019-25028} }
SteerX: Creating Any Camera-Free 3D and 4D Scenes with Geometric Steering: Byeongjun Park,

Hyojun Go,

Hyelin Nam,

Byung-Hoon Kim,

Hyungjin Chung,

Changick Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2025_ICCV, author = {Park, Byeongjun and Go, Hyojun and Nam, Hyelin and Kim, Byung-Hoon and Chung, Hyungjin and Kim, Changick}, title = {SteerX: Creating Any Camera-Free 3D and 4D Scenes with Geometric Steering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27326-27337} }
SweetTok: Semantic-Aware Spatial-Temporal Tokenizer for Compact Video Discretization: Zhentao Tan,

Ben Xue,

Jian Jia,

Junhao Wang,

Wencai Ye,

Shaoyun Shi,

Mingjie Sun,

Wenjin Wu,

Quan Chen,

Peng Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tan_2025_ICCV, author = {Tan, Zhentao and Xue, Ben and Jia, Jian and Wang, Junhao and Ye, Wencai and Shi, Shaoyun and Sun, Mingjie and Wu, Wenjin and Chen, Quan and Jiang, Peng}, title = {SweetTok: Semantic-Aware Spatial-Temporal Tokenizer for Compact Video Discretization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23541-23550} }
D-Attn: Decomposed Attention for Large Vision-and-Language Model: Chia-Wen Kuo,

Sijie Zhu,

Fan Chen,

Xiaohui Shen,

Longyin Wen; [pdf] [supp]
[bibtex]
@InProceedings{Kuo_2025_ICCV, author = {Kuo, Chia-Wen and Zhu, Sijie and Chen, Fan and Shen, Xiaohui and Wen, Longyin}, title = {D-Attn: Decomposed Attention for Large Vision-and-Language Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23935-23944} }
Gaussian Splatting with Discretized SDF for Relightable Assets: Zuo-Liang Zhu,

Jian Yang,

Beibei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Zuo-Liang and Yang, Jian and Wang, Beibei}, title = {Gaussian Splatting with Discretized SDF for Relightable Assets}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25155-25164} }
AAA-Gaussians: Anti-Aliased and Artifact-Free 3D Gaussian Rendering: Michael Steiner,

Thomas Köhler,

Lukas Radl,

Felix Windisch,

Dieter Schmalstieg,

Markus Steinberger; [pdf] [supp]
[bibtex]
@InProceedings{Steiner_2025_ICCV, author = {Steiner, Michael and K\"ohler, Thomas and Radl, Lukas and Windisch, Felix and Schmalstieg, Dieter and Steinberger, Markus}, title = {AAA-Gaussians: Anti-Aliased and Artifact-Free 3D Gaussian Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27650-27659} }
ResidualViT for Efficient Temporally Dense Video Encoding: Mattia Soldan,

Fabian Caba Heilbron,

Bernard Ghanem,

Josef Sivic,

Bryan Russell; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Soldan_2025_ICCV, author = {Soldan, Mattia and Heilbron, Fabian Caba and Ghanem, Bernard and Sivic, Josef and Russell, Bryan}, title = {ResidualViT for Efficient Temporally Dense Video Encoding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22305-22315} }
HQ-CLIP: Leveraging Large Vision-Language Models to Create High-Quality Image-Text Datasets and CLIP Models: Zhixiang Wei,

Guangting Wang,

Xiaoxiao Ma,

Ke Mei,

Huaian Chen,

Yi Jin,

Fengyun Rao; [pdf] [supp]
[bibtex]
@InProceedings{Wei_2025_ICCV, author = {Wei, Zhixiang and Wang, Guangting and Ma, Xiaoxiao and Mei, Ke and Chen, Huaian and Jin, Yi and Rao, Fengyun}, title = {HQ-CLIP: Leveraging Large Vision-Language Models to Create High-Quality Image-Text Datasets and CLIP Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22447-22456} }
Bridging the Gap Between Ideal and Real-world Evaluation: Benchmarking AI-Generated Image Detection in Challenging Scenarios: Chunxiao Li,

Xiaoxiao Wang,

Meiling Li,

Boming Miao,

Peng Sun,

Yunjian Zhang,

Xiangyang Ji,

Yao Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Chunxiao and Wang, Xiaoxiao and Li, Meiling and Miao, Boming and Sun, Peng and Zhang, Yunjian and Ji, Xiangyang and Zhu, Yao}, title = {Bridging the Gap Between Ideal and Real-world Evaluation: Benchmarking AI-Generated Image Detection in Challenging Scenarios}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20379-20389} }
Progressive Test Time Energy Adaptation for Medical Image Segmentation: Xiaoran Zhang,

Byung-Woo Hong,

Hyoungseob Park,

Daniel H. Pak,

Anne-Marie Rickmann,

Lawrence H. Staib,

James S. Duncan,

Alex Wong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xiaoran and Hong, Byung-Woo and Park, Hyoungseob and Pak, Daniel H. and Rickmann, Anne-Marie and Staib, Lawrence H. and Duncan, James S. and Wong, Alex}, title = {Progressive Test Time Energy Adaptation for Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22338-22348} }
MUG: Pseudo Labeling Augmented Audio-Visual Mamba Network for Audio-Visual Video Parsing: Langyu Wang,

Bingke Zhu,

Yingying Chen,

Yiyuan Zhang,

Ming Tang,

Jinqiao Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Langyu and Zhu, Bingke and Chen, Yingying and Zhang, Yiyuan and Tang, Ming and Wang, Jinqiao}, title = {MUG: Pseudo Labeling Augmented Audio-Visual Mamba Network for Audio-Visual Video Parsing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20637-20646} }
Mixed Signals: A Diverse Point Cloud Dataset for Heterogeneous LiDAR V2X Collaboration: Katie Z Luo,

Minh-Quan Dao,

Zhenzhen Liu,

Mark Campbell,

Wei-Lun Chao,

Kilian Q Weinberger,

Ezio Malis,

Vincent Fremont,

Bharath Hariharan,

Mao Shan,

Stewart Worrall,

Julie Stephany Berrio Perez; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2025_ICCV, author = {Luo, Katie Z and Dao, Minh-Quan and Liu, Zhenzhen and Campbell, Mark and Chao, Wei-Lun and Weinberger, Kilian Q and Malis, Ezio and Fremont, Vincent and Hariharan, Bharath and Shan, Mao and Worrall, Stewart and Perez, Julie Stephany Berrio}, title = {Mixed Signals: A Diverse Point Cloud Dataset for Heterogeneous LiDAR V2X Collaboration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28763-28773} }
OracleFusion: Assisting the Decipherment of Oracle Bone Script with Structurally Constrained Semantic Typography: Caoshuo Li,

Zengmao Ding,

Xiaobin Hu,

Bang Li,

Donghao Luo,

AndyPian Wu,

Chaoyang Wang,

Chengjie Wang,

Taisong Jin,

Seven Shu,

Yunsheng Wu,

Yongge Liu,

Rongrong Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Caoshuo and Ding, Zengmao and Hu, Xiaobin and Li, Bang and Luo, Donghao and Wu, AndyPian and Wang, Chaoyang and Wang, Chengjie and Jin, Taisong and Shu, Seven and Wu, Yunsheng and Liu, Yongge and Ji, Rongrong}, title = {OracleFusion: Assisting the Decipherment of Oracle Bone Script with Structurally Constrained Semantic Typography}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19893-19902} }
Streaming VideoLLMs for Real-Time Procedural Video Understanding: Dibyadip Chatterjee,

Edoardo Remelli,

Yale Song,

Bugra Tekin,

Abhay Mittal,

Bharat Bhatnagar,

Necati Cihan Camgoz,

Shreyas Hampali,

Eric Sauser,

Shugao Ma,

Angela Yao,

Fadime Sener; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chatterjee_2025_ICCV, author = {Chatterjee, Dibyadip and Remelli, Edoardo and Song, Yale and Tekin, Bugra and Mittal, Abhay and Bhatnagar, Bharat and Camgoz, Necati Cihan and Hampali, Shreyas and Sauser, Eric and Ma, Shugao and Yao, Angela and Sener, Fadime}, title = {Streaming VideoLLMs for Real-Time Procedural Video Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22586-22598} }
Mixture-of-Scores: Robust Image-Text Data Valuation via Three Lines of Code: Sitong Wu,

Haoru Tan,

Yukang Chen,

Shaofeng Zhang,

Jingyao Li,

Bei Yu,

Xiaojuan Qi,

Jiaya Jia; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Sitong and Tan, Haoru and Chen, Yukang and Zhang, Shaofeng and Li, Jingyao and Yu, Bei and Qi, Xiaojuan and Jia, Jiaya}, title = {Mixture-of-Scores: Robust Image-Text Data Valuation via Three Lines of Code}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24603-24614} }
DMesh++: An Efficient Differentiable Mesh for Complex Shapes: Sanghyun Son,

Matheus Gadelha,

Yang Zhou,

Matthew Fisher,

Zexiang Xu,

Yi-Ling Qiao,

Ming C. Lin,

Yi Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Son_2025_ICCV, author = {Son, Sanghyun and Gadelha, Matheus and Zhou, Yang and Fisher, Matthew and Xu, Zexiang and Qiao, Yi-Ling and Lin, Ming C. and Zhou, Yi}, title = {DMesh++: An Efficient Differentiable Mesh for Complex Shapes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26590-26599} }
Extrapolated Urban View Synthesis Benchmark: Xiangyu Han,

Zhen Jia,

Boyi Li,

Yan Wang,

Boris Ivanovic,

Yurong You,

Lingjie Liu,

Yue Wang,

Marco Pavone,

Chen Feng,

Yiming Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2025_ICCV, author = {Han, Xiangyu and Jia, Zhen and Li, Boyi and Wang, Yan and Ivanovic, Boris and You, Yurong and Liu, Lingjie and Wang, Yue and Pavone, Marco and Feng, Chen and Li, Yiming}, title = {Extrapolated Urban View Synthesis Benchmark}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28718-28728} }
Emulating Self-attention with Convolution for Efficient Image Super-Resolution: Dongheon Lee,

Seokju Yun,

Youngmin Ro; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Dongheon and Yun, Seokju and Ro, Youngmin}, title = {Emulating Self-attention with Convolution for Efficient Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24467-24477} }
CoDa-4DGS: Dynamic Gaussian Splatting with Context and Deformation Awareness for Autonomous Driving: Rui Song,

Chenwei Liang,

Yan Xia,

Walter Zimmer,

Hu Cao,

Holger Caesar,

Andreas Festag,

Alois Knoll; [pdf] [supp]
[bibtex]
@InProceedings{Song_2025_ICCV, author = {Song, Rui and Liang, Chenwei and Xia, Yan and Zimmer, Walter and Cao, Hu and Caesar, Holger and Festag, Andreas and Knoll, Alois}, title = {CoDa-4DGS: Dynamic Gaussian Splatting with Context and Deformation Awareness for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28031-28041} }
RI3D: Few-Shot Gaussian Splatting With Repair and Inpainting Diffusion Priors: Avinash Paliwal,

Xilong Zhou,

Wei Ye,

Jinhui Xiong,

Rakesh Ranjan,

Nima Khademi Kalantari; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Paliwal_2025_ICCV, author = {Paliwal, Avinash and Zhou, Xilong and Ye, Wei and Xiong, Jinhui and Ranjan, Rakesh and Kalantari, Nima Khademi}, title = {RI3D: Few-Shot Gaussian Splatting With Repair and Inpainting Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25094-25103} }
UniVerse: Unleashing the Scene Prior of Video Diffusion Models for Robust Radiance Field Reconstruction: Jin Cao,

Hongrui Wu,

Ziyong Feng,

Hujun Bao,

Xiaowei Zhou,

Sida Peng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2025_ICCV, author = {Cao, Jin and Wu, Hongrui and Feng, Ziyong and Bao, Hujun and Zhou, Xiaowei and Peng, Sida}, title = {UniVerse: Unleashing the Scene Prior of Video Diffusion Models for Robust Radiance Field Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27031-27041} }
From Gallery to Wrist: Realistic 3D Bracelet Insertion in Videos: Chenjian Gao,

Lihe Ding,

Rui Han,

Zhanpeng Huang,

Zibin Wang,

Tianfan Xue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_ICCV, author = {Gao, Chenjian and Ding, Lihe and Han, Rui and Huang, Zhanpeng and Wang, Zibin and Xue, Tianfan}, title = {From Gallery to Wrist: Realistic 3D Bracelet Insertion in Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25712-25721} }
Representing 3D Shapes with 64 Latent Vectors for 3D Diffusion Models: In Cho,

Youngbeom Yoo,

Subin Jeon,

Seon Joo Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cho_2025_ICCV, author = {Cho, In and Yoo, Youngbeom and Jeon, Subin and Kim, Seon Joo}, title = {Representing 3D Shapes with 64 Latent Vectors for 3D Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28556-28566} }
CutS3D: Cutting Semantics in 3D for 2D Unsupervised Instance Segmentation: Leon Sick,

Dominik Engel,

Sebastian Hartwig,

Pedro Hermosilla,

Timo Ropinski; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sick_2025_ICCV, author = {Sick, Leon and Engel, Dominik and Hartwig, Sebastian and Hermosilla, Pedro and Ropinski, Timo}, title = {CutS3D: Cutting Semantics in 3D for 2D Unsupervised Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21265-21275} }
Salvaging the Overlooked: Leveraging Class-Aware Contrastive Learning for Multi-Class Anomaly Detection: Lei Fan,

Junjie Huang,

Donglin Di,

Anyang Su,

Tianyou Song,

Maurice Pagnucco,

Yang Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2025_ICCV, author = {Fan, Lei and Huang, Junjie and Di, Donglin and Su, Anyang and Song, Tianyou and Pagnucco, Maurice and Song, Yang}, title = {Salvaging the Overlooked: Leveraging Class-Aware Contrastive Learning for Multi-Class Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21419-21428} }
Disentangling Instance and Scene Contexts for 3D Semantic Scene Completion: Enyu Liu,

En Yu,

Sijia Chen,

Wenbing Tao; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Enyu and Yu, En and Chen, Sijia and Tao, Wenbing}, title = {Disentangling Instance and Scene Contexts for 3D Semantic Scene Completion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26999-27009} }
World4Drive: End-to-End Autonomous Driving via Intention-aware Physical Latent World Model: Yupeng Zheng,

Pengxuan Yang,

Zebin Xing,

Qichao Zhang,

Yuhang Zheng,

Yinfeng Gao,

Pengfei Li,

Teng Zhang,

Zhongpu Xia,

Peng Jia,

XianPeng Lang,

Dongbin Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Yupeng and Yang, Pengxuan and Xing, Zebin and Zhang, Qichao and Zheng, Yuhang and Gao, Yinfeng and Li, Pengfei and Zhang, Teng and Xia, Zhongpu and Jia, Peng and Lang, XianPeng and Zhao, Dongbin}, title = {World4Drive: End-to-End Autonomous Driving via Intention-aware Physical Latent World Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28632-28642} }
Online Language Splatting: Saimouli Katragadda,

Cho-Ying Wu,

Yuliang Guo,

Xinyu Huang,

Guoquan Huang,

Liu Ren; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Katragadda_2025_ICCV, author = {Katragadda, Saimouli and Wu, Cho-Ying and Guo, Yuliang and Huang, Xinyu and Huang, Guoquan and Ren, Liu}, title = {Online Language Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25882-25892} }
VTimeCoT: Thinking by Drawing for Video Temporal Grounding and Reasoning: Jinglei Zhang,

Yuanfan Guo,

Rolandos Alexandros Potamias,

Jiankang Deng,

Hang Xu,

Chao Ma; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Jinglei and Guo, Yuanfan and Potamias, Rolandos Alexandros and Deng, Jiankang and Xu, Hang and Ma, Chao}, title = {VTimeCoT: Thinking by Drawing for Video Temporal Grounding and Reasoning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24203-24213} }
RA-BUSSeg: Relation-aware Semi-supervised Breast Ultrasound Image Segmentation via Adjacent Propagation and Cross-layer Alignment: Wanting Zhang,

Zhenhui Ding,

Guilian Chen,

Huisi Wu,

Jing Qin; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Wanting and Ding, Zhenhui and Chen, Guilian and Wu, Huisi and Qin, Jing}, title = {RA-BUSSeg: Relation-aware Semi-supervised Breast Ultrasound Image Segmentation via Adjacent Propagation and Cross-layer Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21689-21698} }
Factorized Learning for Temporally Grounded Video-Language Models: Wenzheng Zeng,

Difei Gao,

Mike Zheng Shou,

Hwee Tou Ng; [pdf] [supp]
[bibtex]
@InProceedings{Zeng_2025_ICCV, author = {Zeng, Wenzheng and Gao, Difei and Shou, Mike Zheng and Ng, Hwee Tou}, title = {Factorized Learning for Temporally Grounded Video-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20683-20693} }
WeaveSeg: Iterative Contrast-weaving and Spectral Feature-refining for Nuclei Instance Segmentation: Jiajia Li,

Huisi Wu,

Jing Qin; [pdf]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Jiajia and Wu, Huisi and Qin, Jing}, title = {WeaveSeg: Iterative Contrast-weaving and Spectral Feature-refining for Nuclei Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21984-21993} }
A Real-world Display Inverse Rendering Dataset: Seokjun Choi,

Hoon-Gyu Chung,

Yujin Jeon,

Giljoo Nam,

Seung-Hwan Baek; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2025_ICCV, author = {Choi, Seokjun and Chung, Hoon-Gyu and Jeon, Yujin and Nam, Giljoo and Baek, Seung-Hwan}, title = {A Real-world Display Inverse Rendering Dataset}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25272-25283} }
MEH: A Multi-Style Dataset and Toolkit for Advancing Egyptian Hieroglyph Recognition: Maksim Golyadkin,

Valeria Rubanova,

Aleksandr Utkov,

Dmitry Nikolotov,

Ilya Makarov; [pdf]
[bibtex]
@InProceedings{Golyadkin_2025_ICCV, author = {Golyadkin, Maksim and Rubanova, Valeria and Utkov, Aleksandr and Nikolotov, Dmitry and Makarov, Ilya}, title = {MEH: A Multi-Style Dataset and Toolkit for Advancing Egyptian Hieroglyph Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24488-24496} }
Hi-Gaussian: Hierarchical Gaussians under Normalized Spherical Projection for Single-View 3D Reconstruction: Binjian Xie,

Pengju Zhang,

Hao Wei,

Yihong Wu; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2025_ICCV, author = {Xie, Binjian and Zhang, Pengju and Wei, Hao and Wu, Yihong}, title = {Hi-Gaussian: Hierarchical Gaussians under Normalized Spherical Projection for Single-View 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28664-28673} }
The Scalability of Simplicity: Empirical Analysis of Vision-Language Learning with a Single Transformer: Weixian Lei,

Jiacong Wang,

Haochen Wang,

Xiangtai Li,

Jun Hao Liew,

Jiashi Feng,

Zilong Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lei_2025_ICCV, author = {Lei, Weixian and Wang, Jiacong and Wang, Haochen and Li, Xiangtai and Liew, Jun Hao and Feng, Jiashi and Huang, Zilong}, title = {The Scalability of Simplicity: Empirical Analysis of Vision-Language Learning with a Single Transformer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20758-20769} }
End-to-End Multi-Modal Diffusion Mamba: Chunhao Lu,

Qiang Lu,

Meichen Dong,

Jake Luo; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Chunhao and Lu, Qiang and Dong, Meichen and Luo, Jake}, title = {End-to-End Multi-Modal Diffusion Mamba}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20529-20540} }
Vector Contrastive Learning For Pixel-Wise Pretraining In Medical Vision: Yuting He,

Shuo Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2025_ICCV, author = {He, Yuting and Li, Shuo}, title = {Vector Contrastive Learning For Pixel-Wise Pretraining In Medical Vision}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19827-19837} }
Benefit From Seen: Enhancing Open-Vocabulary Object Detection by Bridging Visual and Textual Co-Occurrence Knowledge: Yanqi Li,

Jianwei Niu,

Tao Ren; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Yanqi and Niu, Jianwei and Ren, Tao}, title = {Benefit From Seen: Enhancing Open-Vocabulary Object Detection by Bridging Visual and Textual Co-Occurrence Knowledge}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22110-22119} }
SynAD: Enhancing Real-World End-to-End Autonomous Driving Models through Synthetic Data Integration: Jongsuk Kim,

Jaeyoung Lee,

Gyojin Han,

Dong-Jae Lee,

Minki Jeong,

Junmo Kim; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Jongsuk and Lee, Jaeyoung and Han, Gyojin and Lee, Dong-Jae and Jeong, Minki and Kim, Junmo}, title = {SynAD: Enhancing Real-World End-to-End Autonomous Driving Models through Synthetic Data Integration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25197-25206} }
Purge-Gate: Backpropagation-Free Test-Time Adaptation for Point Clouds Classification via Token purging: Moslem Yazdanpanah,

Ali Bahri,

Mehrdad Noori,

Sahar Dastani,

Gustavo Adolfo Vargas Hakim,

David Osowiechi,

Ismail Ben Ayed,

Christian Desrosiers; [pdf] [supp]
[bibtex]
@InProceedings{Yazdanpanah_2025_ICCV, author = {Yazdanpanah, Moslem and Bahri, Ali and Noori, Mehrdad and Dastani, Sahar and Hakim, Gustavo Adolfo Vargas and Osowiechi, David and Ben Ayed, Ismail and Desrosiers, Christian}, title = {Purge-Gate: Backpropagation-Free Test-Time Adaptation for Point Clouds Classification via Token purging}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27640-27649} }
PVChat: Personalized Video Chat with One-Shot Learning: Yufei Shi,

Weilong Yan,

Gang Xu,

Yumeng Li,

Yucheng Chen,

Zhenxi Li,

Fei Yu,

Ming Li,

Si Yong Yeo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2025_ICCV, author = {Shi, Yufei and Yan, Weilong and Xu, Gang and Li, Yumeng and Chen, Yucheng and Li, Zhenxi and Yu, Fei and Li, Ming and Yeo, Si Yong}, title = {PVChat: Personalized Video Chat with One-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23321-23331} }
NeuraLeaf: Neural Parametric Leaf Models with Shape and Deformation Disentanglement: Yang Yang,

Dongni Mao,

Hiroaki Santo,

Yasuyuki Matsushita,

Fumio Okura; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Yang and Mao, Dongni and Santo, Hiroaki and Matsushita, Yasuyuki and Okura, Fumio}, title = {NeuraLeaf: Neural Parametric Leaf Models with Shape and Deformation Disentanglement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28167-28176} }
Vamba: Understanding Hour-Long Videos with Hybrid Mamba-Transformers: Weiming Ren,

Wentao Ma,

Huan Yang,

Cong Wei,

Ge Zhang,

Wenhu Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2025_ICCV, author = {Ren, Weiming and Ma, Wentao and Yang, Huan and Wei, Cong and Zhang, Ge and Chen, Wenhu}, title = {Vamba: Understanding Hour-Long Videos with Hybrid Mamba-Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21197-21208} }
From Trial to Triumph: Advancing Long Video Understanding via Visual Context Sample Scaling and Self-reward Alignment: Yucheng Suo,

Fan Ma,

Linchao Zhu,

Tianyi Wang,

Fengyun Rao,

Yi Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Suo_2025_ICCV, author = {Suo, Yucheng and Ma, Fan and Zhu, Linchao and Wang, Tianyi and Rao, Fengyun and Yang, Yi}, title = {From Trial to Triumph: Advancing Long Video Understanding via Visual Context Sample Scaling and Self-reward Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23243-23255} }
GS-LIVM: Real-Time Photo-Realistic LiDAR-Inertial-Visual Mapping with Gaussian Splatting: Yusen Xie,

Zhenmin Huang,

Jin Wu,

Jun Ma; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2025_ICCV, author = {Xie, Yusen and Huang, Zhenmin and Wu, Jin and Ma, Jun}, title = {GS-LIVM: Real-Time Photo-Realistic LiDAR-Inertial-Visual Mapping with Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26869-26878} }
Self-Calibrating Gaussian Splatting for Large Field-of-View Reconstruction: Youming Deng,

Wenqi Xian,

Guandao Yang,

Leonidas Guibas,

Gordon Wetzstein,

Steve Marschner,

Paul Debevec; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deng_2025_ICCV, author = {Deng, Youming and Xian, Wenqi and Yang, Guandao and Guibas, Leonidas and Wetzstein, Gordon and Marschner, Steve and Debevec, Paul}, title = {Self-Calibrating Gaussian Splatting for Large Field-of-View Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25124-25133} }
Automated Model Evaluation for Object Detection via Prediction Consistency and Reliability: Seungju Yoo,

Hyuk Kwon,

Joong-Won Hwang,

Kibok Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yoo_2025_ICCV, author = {Yoo, Seungju and Kwon, Hyuk and Hwang, Joong-Won and Lee, Kibok}, title = {Automated Model Evaluation for Object Detection via Prediction Consistency and Reliability}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19764-19773} }
Stepping Out of Similar Semantic Space for Open-Vocabulary Segmentation: Yong Liu,

Song-Li Wu,

Sule Bai,

Jiahao Wang,

Yitong Wang,

Yansong Tang; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Yong and Wu, Song-Li and Bai, Sule and Wang, Jiahao and Wang, Yitong and Tang, Yansong}, title = {Stepping Out of Similar Semantic Space for Open-Vocabulary Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22664-22674} }
DictAS: A Framework for Class-Generalizable Few-Shot Anomaly Segmentation via Dictionary Lookup: Zhen Qu,

Xian Tao,

Xinyi Gong,

ShiChen Qu,

Xiaopei Zhang,

Xingang Wang,

Fei Shen,

Zhengtao Zhang,

Mukesh Prasad,

Guiguang Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qu_2025_ICCV, author = {Qu, Zhen and Tao, Xian and Gong, Xinyi and Qu, ShiChen and Zhang, Xiaopei and Wang, Xingang and Shen, Fei and Zhang, Zhengtao and Prasad, Mukesh and Ding, Guiguang}, title = {DictAS: A Framework for Class-Generalizable Few-Shot Anomaly Segmentation via Dictionary Lookup}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20519-20528} }
Incremental Few-Shot Semantic Segmentation via Multi-Level Switchable Visual Prompts: Maoxian Wan,

Kaige Li,

Qichuan Geng,

Weimin Shi,

Zhong Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Wan_2025_ICCV, author = {Wan, Maoxian and Li, Kaige and Geng, Qichuan and Shi, Weimin and Zhou, Zhong}, title = {Incremental Few-Shot Semantic Segmentation via Multi-Level Switchable Visual Prompts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24113-24122} }
Faster and Better 3D Splatting via Group Training: Chengbo Wang,

Guozheng Ma,

Yifei Xue,

Yizhen Lao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Chengbo and Ma, Guozheng and Xue, Yifei and Lao, Yizhen}, title = {Faster and Better 3D Splatting via Group Training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27968-27977} }
Flow-MIL: Constructing Highly-expressive Latent Feature Space For Whole Slide Image Classification Using Normalizing Flow: Yingfan Ma,

Bohan An,

Ao Shen,

Mingzhi Yuan,

Minghong Duan,

Manning Wang; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2025_ICCV, author = {Ma, Yingfan and An, Bohan and Shen, Ao and Yuan, Mingzhi and Duan, Minghong and Wang, Manning}, title = {Flow-MIL: Constructing Highly-expressive Latent Feature Space For Whole Slide Image Classification Using Normalizing Flow}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23561-23570} }
3DRealCar: An In-the-wild RGB-D Car Dataset with 360-degree Views: Xiaobiao Du,

Yida Wang,

Haiyang Sun,

Zhuojie Wu,

Hongwei Sheng,

Shuyun Wang,

Jiaying Ying,

Ming Lu,

Tianqing Zhu,

Kun Zhan,

Xin Yu; [pdf] [supp]
[bibtex]
@InProceedings{Du_2025_ICCV, author = {Du, Xiaobiao and Wang, Yida and Sun, Haiyang and Wu, Zhuojie and Sheng, Hongwei and Wang, Shuyun and Ying, Jiaying and Lu, Ming and Zhu, Tianqing and Zhan, Kun and Yu, Xin}, title = {3DRealCar: An In-the-wild RGB-D Car Dataset with 360-degree Views}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26488-26498} }
The Devil is in the Spurious Correlations: Boosting Moment Retrieval with Dynamic Learning: Xinyang Zhou,

Fanyue Wei,

Lixin Duan,

Angela Yao,

Wen Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Xinyang and Wei, Fanyue and Duan, Lixin and Yao, Angela and Li, Wen}, title = {The Devil is in the Spurious Correlations: Boosting Moment Retrieval with Dynamic Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20981-20990} }
Event-based Visual Vibrometry: Xinyu Zhou,

Peiqi Duan,

Yeliduosi Xiaokaiti,

Chao Xu,

Boxin Shi; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Xinyu and Duan, Peiqi and Xiaokaiti, Yeliduosi and Xu, Chao and Shi, Boxin}, title = {Event-based Visual Vibrometry}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24666-24676} }
Keep Your Friends Close, and Your Enemies Farther: Distance-aware Voxel-wise Contrastive Learning for Semi-supervised Multi-organ Segmentation: Haochen Zhao,

Jianwei Niu,

Xuefeng Liu,

Xiaozheng Xie,

Li Kuang,

Haotian Yang,

Bin Dai,

Hui Meng,

Yong Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_ICCV, author = {Zhao, Haochen and Niu, Jianwei and Liu, Xuefeng and Xie, Xiaozheng and Kuang, Li and Yang, Haotian and Dai, Bin and Meng, Hui and Wang, Yong}, title = {Keep Your Friends Close, and Your Enemies Farther: Distance-aware Voxel-wise Contrastive Learning for Semi-supervised Multi-organ Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21832-21842} }
Online Reasoning Video Segmentation with Just-in-Time Digital Twins: Yiqing Shen,

Bohan Liu,

Chenjia Li,

Lalithkumar Seenivasan,

Mathias Unberath; [pdf] [arXiv]
[bibtex]
@InProceedings{Shen_2025_ICCV, author = {Shen, Yiqing and Liu, Bohan and Li, Chenjia and Seenivasan, Lalithkumar and Unberath, Mathias}, title = {Online Reasoning Video Segmentation with Just-in-Time Digital Twins}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24698-24706} }
InterGSEdit: Interactive 3D Gaussian Splatting Editing with 3D Geometry-Consistent Attention Prior: Minghao Wen,

Shengjie Wu,

Kangkan Wang,

Dong Liang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wen_2025_ICCV, author = {Wen, Minghao and Wu, Shengjie and Wang, Kangkan and Liang, Dong}, title = {InterGSEdit: Interactive 3D Gaussian Splatting Editing with 3D Geometry-Consistent Attention Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26136-26145} }
Ph-GAN: Physics-Inspired GAN for Generating SAR Images Under Limited Data: Xidan Zhang,

Yihan Zhuang,

Qian Guo,

Haodong Yang,

Xuelin Qian,

Gong Cheng,

Junwei Han,

Zhongling Huang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xidan and Zhuang, Yihan and Guo, Qian and Yang, Haodong and Qian, Xuelin and Cheng, Gong and Han, Junwei and Huang, Zhongling}, title = {Ph-GAN: Physics-Inspired GAN for Generating SAR Images Under Limited Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29075-29085} }
Interpretable point cloud classification using multiple instance learning: Matt De Vries,

Reed Naidoo,

Olga Fourkioti,

Lucas G. Dent,

Nathan Curry,

Chris Dunsby,

Chris Bakal; [pdf] [supp]
[bibtex]
@InProceedings{De_Vries_2025_ICCV, author = {De Vries, Matt and Naidoo, Reed and Fourkioti, Olga and Dent, Lucas G. and Curry, Nathan and Dunsby, Chris and Bakal, Chris}, title = {Interpretable point cloud classification using multiple instance learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22209-22220} }
Efficient Fine-Tuning of Large Models via Nested Low-Rank Adaptation: Lujun Li,

Cheng Lin,

Dezhi Li,

You-Liang Huang,

Wei Li,

Tianyu Wu,

Jie Zou,

Wei Xue,

Sirui Han,

Yike Guo; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Lujun and Lin, Cheng and Li, Dezhi and Huang, You-Liang and Li, Wei and Wu, Tianyu and Zou, Jie and Xue, Wei and Han, Sirui and Guo, Yike}, title = {Efficient Fine-Tuning of Large Models via Nested Low-Rank Adaptation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22252-22262} }
CoSMIC: Continual Self-supervised Learning for Multi-Domain Medical Imaging via Conditional Mutual Information Maximization: Yihang Liu,

Ying Wen,

Longzhen Yang,

Lianghua He,

Heng Tao Shen; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Yihang and Wen, Ying and Yang, Longzhen and He, Lianghua and Shen, Heng Tao}, title = {CoSMIC: Continual Self-supervised Learning for Multi-Domain Medical Imaging via Conditional Mutual Information Maximization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23051-23062} }
Learnable Retrieval Enhanced Visual-Text Alignment and Fusion for Radiology Report Generation: Qin Zhou,

Guoyan Liang,

Xindi Li,

Jingyuan Chen,

Zhe Wang,

Chang Yao,

Sai Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Qin and Liang, Guoyan and Li, Xindi and Chen, Jingyuan and Wang, Zhe and Yao, Chang and Wu, Sai}, title = {Learnable Retrieval Enhanced Visual-Text Alignment and Fusion for Radiology Report Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22529-22538} }
Revisiting Adversarial Patch Defenses on Object Detectors: Unified Evaluation, Large-Scale Dataset, and New Insights: Junhao Zheng,

Jiahao Sun,

Chenhao Lin,

Zhengyu Zhao,

Chen Ma,

Chong Zhang,

Cong Wang,

Qian Wang,

Chao Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Junhao and Sun, Jiahao and Lin, Chenhao and Zhao, Zhengyu and Ma, Chen and Zhang, Chong and Wang, Cong and Wang, Qian and Shen, Chao}, title = {Revisiting Adversarial Patch Defenses on Object Detectors: Unified Evaluation, Large-Scale Dataset, and New Insights}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23476-23486} }
4D-Bench: Benchmarking Multi-modal Large Language Models for 4D Object Understanding: Wenxuan Zhu,

Bing Li,

Cheng Zheng,

Jinjie Mai,

Jun Chen,

Letian Jiang,

Abdullah Hamdi,

Sara Rojas Martinez,

Chia-Wen Lin,

Mohamed Elhoseiny,

Bernard Ghanem; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Wenxuan and Li, Bing and Zheng, Cheng and Mai, Jinjie and Chen, Jun and Jiang, Letian and Hamdi, Abdullah and Martinez, Sara Rojas and Lin, Chia-Wen and Elhoseiny, Mohamed and Ghanem, Bernard}, title = {4D-Bench: Benchmarking Multi-modal Large Language Models for 4D Object Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21129-21143} }
How Do Optical Flow and Textual Prompts Collaborate to Assist in Audio-Visual Semantic Segmentation?: Yujian Lee,

Peng Gao,

Yongqi Xu,

Wentao Fan; [pdf]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Yujian and Gao, Peng and Xu, Yongqi and Fan, Wentao}, title = {How Do Optical Flow and Textual Prompts Collaborate to Assist in Audio-Visual Semantic Segmentation?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23342-23352} }
Epona: Autoregressive Diffusion World Model for Autonomous Driving: Kaiwen Zhang,

Zhenyu Tang,

Xiaotao Hu,

Xingang Pan,

Xiaoyang Guo,

Yuan Liu,

Jingwei Huang,

Li Yuan,

Qian Zhang,

Xiao-Xiao Long,

Xun Cao,

Wei Yin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Kaiwen and Tang, Zhenyu and Hu, Xiaotao and Pan, Xingang and Guo, Xiaoyang and Liu, Yuan and Huang, Jingwei and Yuan, Li and Zhang, Qian and Long, Xiao-Xiao and Cao, Xun and Yin, Wei}, title = {Epona: Autoregressive Diffusion World Model for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27220-27230} }
Visual Surface Wave Elastography: Revealing Subsurface Physical Properties via Visible Surface Waves: Alexander C. Ogren,

Berthy T. Feng,

Jihoon Ahn,

Katherine L. Bouman,

Chiara Daraio; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ogren_2025_ICCV, author = {Ogren, Alexander C. and Feng, Berthy T. and Ahn, Jihoon and Bouman, Katherine L. and Daraio, Chiara}, title = {Visual Surface Wave Elastography: Revealing Subsurface Physical Properties via Visible Surface Waves}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26446-26455} }
ViCTr: Vital Consistency Transfer for Pathology Aware Image Synthesis: Onkar Susladkar,

Gayatri Deshmukh,

Yalcin Tur,

Gorkem Durak,

Ulas Bagci; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Susladkar_2025_ICCV, author = {Susladkar, Onkar and Deshmukh, Gayatri and Tur, Yalcin and Durak, Gorkem and Bagci, Ulas}, title = {ViCTr: Vital Consistency Transfer for Pathology Aware Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22772-22782} }
When Pixel Difference Patterns Meet ViT: PiDiViT for Few-Shot Object Detection: Hongliang Zhou,

Yongxiang Liu,

Canyu Mo,

Weijie Li,

Bowen Peng,

Li Liu; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Hongliang and Liu, Yongxiang and Mo, Canyu and Li, Weijie and Peng, Bowen and Liu, Li}, title = {When Pixel Difference Patterns Meet ViT: PiDiViT for Few-Shot Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24309-24318} }
From Objects to Events: Unlocking Complex Visual Understanding in Object Detectors via LLM-guided Symbolic Reasoning: Yuhui Zeng,

Haoxiang Wu,

Wenjie Nie,

Guangyao Chen,

Xiawu Zheng,

Yunhang Shen,

Jun Peng,

Yonghong Tian,

Rongrong Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2025_ICCV, author = {Zeng, Yuhui and Wu, Haoxiang and Nie, Wenjie and Chen, Guangyao and Zheng, Xiawu and Shen, Yunhang and Peng, Jun and Tian, Yonghong and Ji, Rongrong}, title = {From Objects to Events: Unlocking Complex Visual Understanding in Object Detectors via LLM-guided Symbolic Reasoning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24380-24391} }
Pruning All-Rounder: Rethinking and Improving Inference Efficiency for Large Vision Language Models: Wei Suo,

Ji Ma,

Mengyang Sun,

Lin Yuanbo Wu,

Peng Wang,

Yanning Zhang; [pdf]
[bibtex]
@InProceedings{Suo_2025_ICCV, author = {Suo, Wei and Ma, Ji and Sun, Mengyang and Wu, Lin Yuanbo and Wang, Peng and Zhang, Yanning}, title = {Pruning All-Rounder: Rethinking and Improving Inference Efficiency for Large Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20247-20256} }
Discontinuity-aware Normal Integration for Generic Central Camera Models: Francesco Milano,

Manuel López-Antequera,

Naina Dhingra,

Roland Siegwart,

Robert Thiel; [pdf] [supp]
[bibtex]
@InProceedings{Milano_2025_ICCV, author = {Milano, Francesco and L\'opez-Antequera, Manuel and Dhingra, Naina and Siegwart, Roland and Thiel, Robert}, title = {Discontinuity-aware Normal Integration for Generic Central Camera Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26026-26034} }
OmniDiff: A Comprehensive Benchmark for Fine-grained Image Difference Captioning: Yuan Liu,

Saihui Hou,

Saijie Hou,

Jiabao Du,

Shibei Meng,

Yongzhen Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Yuan and Hou, Saihui and Hou, Saijie and Du, Jiabao and Meng, Shibei and Huang, Yongzhen}, title = {OmniDiff: A Comprehensive Benchmark for Fine-grained Image Difference Captioning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21440-21449} }
MAESTRO: Task-Relevant Optimization via Adaptive Feature Enhancement and Suppression for Multi-task 3D Perception: Changwon Kang,

Jisong Kim,

Hongjae Shin,

Junseo Park,

Jun Won Choi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2025_ICCV, author = {Kang, Changwon and Kim, Jisong and Shin, Hongjae and Park, Junseo and Choi, Jun Won}, title = {MAESTRO: Task-Relevant Optimization via Adaptive Feature Enhancement and Suppression for Multi-task 3D Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28313-28323} }
CasP: Improving Semi-Dense Feature Matching Pipeline Leveraging Cascaded Correspondence Priors for Guidance: Peiqi Chen,

Lei Yu,

Yi Wan,

Yingying Pei,

Xinyi Liu,

Yongxiang Yao,

Yingying Zhang,

Lixiang Ru,

Liheng Zhong,

Jingdong Chen,

Ming Yang,

Yongjun Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Peiqi and Yu, Lei and Wan, Yi and Pei, Yingying and Liu, Xinyi and Yao, Yongxiang and Zhang, Yingying and Ru, Lixiang and Zhong, Liheng and Chen, Jingdong and Yang, Ming and Zhang, Yongjun}, title = {CasP: Improving Semi-Dense Feature Matching Pipeline Leveraging Cascaded Correspondence Priors for Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28063-28072} }
ResQ: A Novel Framework to Implement Residual Neural Networks on Analog Rydberg Atom Quantum Computers: Nicholas S. DiBrita,

Jason Han,

Tirthak Patel; [pdf] [arXiv]
[bibtex]
@InProceedings{DiBrita_2025_ICCV, author = {DiBrita, Nicholas S. and Han, Jason and Patel, Tirthak}, title = {ResQ: A Novel Framework to Implement Residual Neural Networks on Analog Rydberg Atom Quantum Computers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20085-20094} }
Coordinate-based Speed of Sound Recovery for Aberration-Corrected Photoacoustic Computed Tomography: Tianao Li,

Manxiu Cui,

Cheng Ma,

Emma Alexander; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Tianao and Cui, Manxiu and Ma, Cheng and Alexander, Emma}, title = {Coordinate-based Speed of Sound Recovery for Aberration-Corrected Photoacoustic Computed Tomography}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27466-27475} }
HarmonySeg: Tubular Structure Segmentation with Deep-Shallow Feature Fusion and Growth-Suppression Balanced Loss: Yi Huang,

Ke Zhang,

Wei Liu,

Yuanyuan Wang,

Vishal M. Patel,

Le Lu,

Xu Han,

Dakai Jin,

Ke Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Yi and Zhang, Ke and Liu, Wei and Wang, Yuanyuan and Patel, Vishal M. and Lu, Le and Han, Xu and Jin, Dakai and Yan, Ke}, title = {HarmonySeg: Tubular Structure Segmentation with Deep-Shallow Feature Fusion and Growth-Suppression Balanced Loss}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23571-23581} }
Beyond Training: Dynamic Token Merging for Zero-Shot Video Understanding: Yiming Zhang,

Zhuokai Zhao,

Zhaorun Chen,

Zenghui Ding,

Xianjun Yang,

Yining Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yiming and Zhao, Zhuokai and Chen, Zhaorun and Ding, Zenghui and Yang, Xianjun and Sun, Yining}, title = {Beyond Training: Dynamic Token Merging for Zero-Shot Video Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22046-22055} }
CounterPC: Counterfactual Feature Realignment for Unsupervised Domain Adaptation on Point Clouds: Feng Yang,

Yichao Cao,

Xiu Su,

Dan Niu,

Xuanpeng Li; [pdf]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Feng and Cao, Yichao and Su, Xiu and Niu, Dan and Li, Xuanpeng}, title = {CounterPC: Counterfactual Feature Realignment for Unsupervised Domain Adaptation on Point Clouds}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24760-24769} }
MPG-SAM 2: Adapting SAM 2 with Mask Priors and Global Context for Referring Video Object Segmentation: Fu Rong,

Meng Lan,

Qian Zhang,

Lefei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rong_2025_ICCV, author = {Rong, Fu and Lan, Meng and Zhang, Qian and Zhang, Lefei}, title = {MPG-SAM 2: Adapting SAM 2 with Mask Priors and Global Context for Referring Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23979-23989} }
FreeSplatter: Pose-free Gaussian Splatting for Sparse-view 3D Reconstruction: Jiale Xu,

Shenghua Gao,

Ying Shan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Jiale and Gao, Shenghua and Shan, Ying}, title = {FreeSplatter: Pose-free Gaussian Splatting for Sparse-view 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25442-25452} }
FreqPDE: Rethinking Positional Depth Embedding for Multi-View 3D Object Detection Transformers: Haisheng Su,

Junjie Zhang,

Feixiang Song,

Sanping Zhou,

Wei Wu,

Junchi Yan,

Nanning Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Su_2025_ICCV, author = {Su, Haisheng and Zhang, Junjie and Song, Feixiang and Zhou, Sanping and Wu, Wei and Yan, Junchi and Zheng, Nanning}, title = {FreqPDE: Rethinking Positional Depth Embedding for Multi-View 3D Object Detection Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28145-28155} }
Hierarchy-Aware Pseudo Word Learning with Text Adaptation for Zero-Shot Composed Image Retrieval: Zhe Li,

Lei Zhang,

Zheren Fu,

Kun Zhang,

Zhendong Mao; [pdf]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Zhe and Zhang, Lei and Fu, Zheren and Zhang, Kun and Mao, Zhendong}, title = {Hierarchy-Aware Pseudo Word Learning with Text Adaptation for Zero-Shot Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24319-24329} }
WSI-LLaVA: A Multimodal Large Language Model for Whole Slide Image: Yuci Liang,

Xinheng Lyu,

Wenting Chen,

Meidan Ding,

Jipeng Zhang,

Xiangjian He,

Song Wu,

Xiaohan Xing,

Sen Yang,

Xiyue Wang,

Linlin Shen; [pdf] [supp]
[bibtex]
@InProceedings{Liang_2025_ICCV, author = {Liang, Yuci and Lyu, Xinheng and Chen, Wenting and Ding, Meidan and Zhang, Jipeng and He, Xiangjian and Wu, Song and Xing, Xiaohan and Yang, Sen and Wang, Xiyue and Shen, Linlin}, title = {WSI-LLaVA: A Multimodal Large Language Model for Whole Slide Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22718-22727} }
Enhancing Partially Relevant Video Retrieval with Hyperbolic Learning: Jun Li,

Jinpeng Wang,

Chaolei Tan,

Niu Lian,

Long Chen,

Yaowei Wang,

Min Zhang,

Shu-Tao Xia,

Bin Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Jun and Wang, Jinpeng and Tan, Chaolei and Lian, Niu and Chen, Long and Wang, Yaowei and Zhang, Min and Xia, Shu-Tao and Chen, Bin}, title = {Enhancing Partially Relevant Video Retrieval with Hyperbolic Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23074-23084} }
Learning Yourself: Class-Incremental Semantic Segmentation with Language-Inspired Bootstrapped Disentanglement: Ruitao Wu,

Yifan Zhao,

Jia Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Ruitao and Zhao, Yifan and Li, Jia}, title = {Learning Yourself: Class-Incremental Semantic Segmentation with Language-Inspired Bootstrapped Disentanglement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21623-21634} }
Towards Fine-grained Interactive Segmentation in Images and Videos: Yuan Yao,

Qiushi Yang,

Miaomiao Cui,

Liefeng Bo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yao_2025_ICCV, author = {Yao, Yuan and Yang, Qiushi and Cui, Miaomiao and Bo, Liefeng}, title = {Towards Fine-grained Interactive Segmentation in Images and Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22509-22518} }
Puzzle Similarity: A Perceptually-guided Cross-Reference Metric for Artifact Detection in 3D Scene Reconstructions: Nicolai Hermann,

Jorge Condor,

Piotr Didyk; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hermann_2025_ICCV, author = {Hermann, Nicolai and Condor, Jorge and Didyk, Piotr}, title = {Puzzle Similarity: A Perceptually-guided Cross-Reference Metric for Artifact Detection in 3D Scene Reconstructions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28881-28891} }
AlignDiff: Learning Physically-Grounded Camera Alignment via Diffusion: Liuyue Xie,

Jiancong Guo,

Ozan Cakmakci,

Andre Araujo,

László A. Jeni,

Zhiheng Jia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2025_ICCV, author = {Xie, Liuyue and Guo, Jiancong and Cakmakci, Ozan and Araujo, Andre and Jeni, L\'aszl\'o A. and Jia, Zhiheng}, title = {AlignDiff: Learning Physically-Grounded Camera Alignment via Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26901-26911} }
Toward Fair and Accurate Cross-Domain Medical Image Segmentation: A VLM-Driven Active Domain Adaptation Paradigm: Hongqiu Wang,

Wu Chen,

Xiangde Luo,

Zhaohu Xing,

Lihao Liu,

Jing Qin,

Shaozhi Wu,

Lei Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Hongqiu and Chen, Wu and Luo, Xiangde and Xing, Zhaohu and Liu, Lihao and Qin, Jing and Wu, Shaozhi and Zhu, Lei}, title = {Toward Fair and Accurate Cross-Domain Medical Image Segmentation: A VLM-Driven Active Domain Adaptation Paradigm}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24102-24112} }
Scaling Transformer-Based Novel View Synthesis with Models Token Disentanglement and Synthetic Data: Nithin Gopalakrishnan Nair,

Srinivas Kaza,

Xuan Luo,

Vishal M. Patel,

Stephen Lombardi,

Jungyeon Park; [pdf] [supp]
[bibtex]
@InProceedings{Nair_2025_ICCV, author = {Nair, Nithin Gopalakrishnan and Kaza, Srinivas and Luo, Xuan and Patel, Vishal M. and Lombardi, Stephen and Park, Jungyeon}, title = {Scaling Transformer-Based Novel View Synthesis with Models Token Disentanglement and Synthetic Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28567-28576} }
Multi-Schema Proximity Network for Composed Image Retrieval: Jiangming Shi,

Xiangbo Yin,

Yeyun Chen,

Yachao Zhang,

Zhizhong Zhang,

Yuan Xie,

Yanyun Qu; [pdf] [supp]
[bibtex]
@InProceedings{Shi_2025_ICCV, author = {Shi, Jiangming and Yin, Xiangbo and Chen, Yeyun and Zhang, Yachao and Zhang, Zhizhong and Xie, Yuan and Qu, Yanyun}, title = {Multi-Schema Proximity Network for Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19999-20008} }
Harnessing Text-to-Image Diffusion Models for Point Cloud Self-Supervised Learning: Yiyang Chen,

Shanshan Zhao,

Lunhao Duan,

Changxing Ding,

Dacheng Tao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Yiyang and Zhao, Shanshan and Duan, Lunhao and Ding, Changxing and Tao, Dacheng}, title = {Harnessing Text-to-Image Diffusion Models for Point Cloud Self-Supervised Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26156-26166} }
Bridging the Gap between Brain and Machine in Interpreting Visual Semantics: Towards Self-adaptive Brain-to-Text Decoding: Jiaxuan Chen,

Yu Qi,

Yueming Wang,

Gang Pan; [pdf]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Jiaxuan and Qi, Yu and Wang, Yueming and Pan, Gang}, title = {Bridging the Gap between Brain and Machine in Interpreting Visual Semantics: Towards Self-adaptive Brain-to-Text Decoding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21938-21948} }
Doppler-Aware LiDAR-RADAR Fusion for Weather-Robust 3D Detection: Yujeong Chae,

Heejun Park,

Hyeonseong Kim,

Kuk-Jin Yoon; [pdf] [supp]
[bibtex]
@InProceedings{Chae_2025_ICCV, author = {Chae, Yujeong and Park, Heejun and Kim, Hyeonseong and Yoon, Kuk-Jin}, title = {Doppler-Aware LiDAR-RADAR Fusion for Weather-Robust 3D Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27197-27208} }
VideoOrion: Tokenizing Object Dynamics in Videos: Yicheng Feng,

Yijiang Li,

Wanpeng Zhang,

Sipeng Zheng,

Hao Luo,

Zihao Yue,

Zongqing Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2025_ICCV, author = {Feng, Yicheng and Li, Yijiang and Zhang, Wanpeng and Zheng, Sipeng and Luo, Hao and Yue, Zihao and Lu, Zongqing}, title = {VideoOrion: Tokenizing Object Dynamics in Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20401-20412} }
Cracking Instance Jigsaw Puzzles: An Alternative to Multiple Instance Learning for Whole Slide Image Analysis: Xiwen Chen,

Peijie Qiu,

Wenhui Zhu,

Hao Wang,

Huayu Li,

Xuanzhao Dong,

Xiaotong Sun,

Xiaobing Yu,

Yalin Wang,

Abolfazl Razi,

Aristeidis Sotiras; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Xiwen and Qiu, Peijie and Zhu, Wenhui and Wang, Hao and Li, Huayu and Dong, Xuanzhao and Sun, Xiaotong and Yu, Xiaobing and Wang, Yalin and Razi, Abolfazl and Sotiras, Aristeidis}, title = {Cracking Instance Jigsaw Puzzles: An Alternative to Multiple Instance Learning for Whole Slide Image Analysis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21353-21363} }
FlowR: Flowing from Sparse to Dense 3D Reconstructions: Tobias Fischer,

Samuel Rota Bulò,

Yung-Hsu Yang,

Nikhil Keetha,

Lorenzo Porzi,

Norman Müller,

Katja Schwarz,

Jonathon Luiten,

Marc Pollefeys,

Peter Kontschieder; [pdf] [supp]
[bibtex]
@InProceedings{Fischer_2025_ICCV, author = {Fischer, Tobias and Bul\`o, Samuel Rota and Yang, Yung-Hsu and Keetha, Nikhil and Porzi, Lorenzo and M\"uller, Norman and Schwarz, Katja and Luiten, Jonathon and Pollefeys, Marc and Kontschieder, Peter}, title = {FlowR: Flowing from Sparse to Dense 3D Reconstructions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27702-27712} }
SPA: Efficient User-Preference Alignment against Uncertainty in Medical Image Segmentation: Jiayuan Zhu,

Junde Wu,

Cheng Ouyang,

Konstantinos Kamnitsas,

J. Alison Noble; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Jiayuan and Wu, Junde and Ouyang, Cheng and Kamnitsas, Konstantinos and Noble, J. Alison}, title = {SPA: Efficient User-Preference Alignment against Uncertainty in Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23731-23740} }
GaussianReg: Rapid 2D/3D Registration for Emergency Surgery via Explicit 3D Modeling with Gaussian Primitives: Weihao Yu,

Xiaoqing Guo,

Xinyu Liu,

Yifan Liu,

Hao Zheng,

Yawen Huang,

Yixuan Yuan; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Weihao and Guo, Xiaoqing and Liu, Xinyu and Liu, Yifan and Zheng, Hao and Huang, Yawen and Yuan, Yixuan}, title = {GaussianReg: Rapid 2D/3D Registration for Emergency Surgery via Explicit 3D Modeling with Gaussian Primitives}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21482-21491} }
Robustifying Zero-Shot Vision Language Models by Subspaces Alignment: Junhao Dong,

Piotr Koniusz,

Liaoyuan Feng,

Yifei Zhang,

Hao Zhu,

Weiming Liu,

Xinghua Qu,

Yew-Soon Ong; [pdf] [supp]
[bibtex]
@InProceedings{Dong_2025_ICCV, author = {Dong, Junhao and Koniusz, Piotr and Feng, Liaoyuan and Zhang, Yifei and Zhu, Hao and Liu, Weiming and Qu, Xinghua and Ong, Yew-Soon}, title = {Robustifying Zero-Shot Vision Language Models by Subspaces Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21037-21047} }
GaRe: Relightable 3D Gaussian Splatting for Outdoor Scenes from Unconstrained Photo Collections: Haiyang Bai,

Jiaqi Zhu,

Songru Jiang,

Wei Huang,

Tao Lu,

Yuanqi Li,

Jie Guo,

Runze Fu,

Yanwen Guo,

Lijun Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bai_2025_ICCV, author = {Bai, Haiyang and Zhu, Jiaqi and Jiang, Songru and Huang, Wei and Lu, Tao and Li, Yuanqi and Guo, Jie and Fu, Runze and Guo, Yanwen and Chen, Lijun}, title = {GaRe: Relightable 3D Gaussian Splatting for Outdoor Scenes from Unconstrained Photo Collections}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26456-26465} }
LoD-Loc v2: Aerial Visual Localization over Low Level-of-Detail City Models using Explicit Silhouette Alignment: Juelin Zhu,

Shuaibang Peng,

Long Wang,

Hanlin Tan,

Yu Liu,

Maojun Zhang,

Shen Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Juelin and Peng, Shuaibang and Wang, Long and Tan, Hanlin and Liu, Yu and Zhang, Maojun and Yan, Shen}, title = {LoD-Loc v2: Aerial Visual Localization over Low Level-of-Detail City Models using Explicit Silhouette Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26610-26621} }
EYE3:Turn Anything into Naked-eye 3D: Yingde Song,

Zongyuan Yang,

Baolin Liu,

Yongping Xiong,

Sai Chen,

Lan Yi,

Zhaohe Zhang,

Xunbo Yu; [pdf] [supp]
[bibtex]
@InProceedings{Song_2025_ICCV, author = {Song, Yingde and Yang, Zongyuan and Liu, Baolin and Xiong, Yongping and Chen, Sai and Yi, Lan and Zhang, Zhaohe and Yu, Xunbo}, title = {EYE3:Turn Anything into Naked-eye 3D}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27862-27871} }
LV-MAE: Learning Long Video Representations through Masked-Embedding Autoencoders: Ilan Naiman,

Emanuel Ben-Baruch,

Oron Anschel,

Alon Shoshan,

Igor Kviatkovsky,

Manoj Aggarwal,

Gerard Medioni; [pdf] [supp]
[bibtex]
@InProceedings{Naiman_2025_ICCV, author = {Naiman, Ilan and Ben-Baruch, Emanuel and Anschel, Oron and Shoshan, Alon and Kviatkovsky, Igor and Aggarwal, Manoj and Medioni, Gerard}, title = {LV-MAE: Learning Long Video Representations through Masked-Embedding Autoencoders}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21398-21407} }
STDDNet: Harnessing Mamba for Video Polyp Segmentation via Spatial-aligned Temporal Modeling and Discriminative Dynamic Representation Learning: Guilian Chen,

Huisi Wu,

Jing Qin; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Guilian and Wu, Huisi and Qin, Jing}, title = {STDDNet: Harnessing Mamba for Video Polyp Segmentation via Spatial-aligned Temporal Modeling and Discriminative Dynamic Representation Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21364-21373} }
2D Gaussian Splatting-based Sparse-view Transparent Object Depth Reconstruction via Physics Simulation for Scene Update: Jeongyun Kim,

Seunghoon Jeong,

Giseop Kim,

Myung-Hwan Jeon,

Eunji Jun,

Ayoung Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Jeongyun and Jeong, Seunghoon and Kim, Giseop and Jeon, Myung-Hwan and Jun, Eunji and Kim, Ayoung}, title = {2D Gaussian Splatting-based Sparse-view Transparent Object Depth Reconstruction via Physics Simulation for Scene Update}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27927-27936} }
HouseCrafter: Lifting Floorplans to 3D Scenes with 2D Diffusion Models: Yiwen Chen,

Hieu T. Nguyen,

Vikram Voleti,

Varun Jampani,

Huaizu Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Yiwen and Nguyen, Hieu T. and Voleti, Vikram and Jampani, Varun and Jiang, Huaizu}, title = {HouseCrafter: Lifting Floorplans to 3D Scenes with 2D Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28440-28450} }
Leveraging Local Patch Alignment to Seam-cutting for Large Parallax Image Stitching: Tianli Liao,

Chenyang Zhao,

Lei Li,

Heling Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liao_2025_ICCV, author = {Liao, Tianli and Zhao, Chenyang and Li, Lei and Cao, Heling}, title = {Leveraging Local Patch Alignment to Seam-cutting for Large Parallax Image Stitching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27262-27271} }
Revisiting Efficient Semantic Segmentation: Learning Offsets for Better Spatial and Class Feature Alignment: Shi-Chen Zhang,

Yunheng Li,

Yu-Huan Wu,

Qibin Hou,

Ming-Ming Cheng; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Shi-Chen and Li, Yunheng and Wu, Yu-Huan and Hou, Qibin and Cheng, Ming-Ming}, title = {Revisiting Efficient Semantic Segmentation: Learning Offsets for Better Spatial and Class Feature Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22361-22371} }
TAD-E2E: A Large-scale End-to-end Autonomous Driving Dataset: Chang Liu,

Mingxu Zhu,

Zheyuan Zhang,

Linna Song,

Xiao Zhao,

Qingliang Luo,

Qi Wang,

Chufan Guo,

Kuifeng Su; [pdf]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Chang and Zhu, Mingxu and Zhang, Zheyuan and Song, Linna and Zhao, Xiao and Luo, Qingliang and Wang, Qi and Guo, Chufan and Su, Kuifeng}, title = {TAD-E2E: A Large-scale End-to-end Autonomous Driving Dataset}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26600-26609} }
LVAgent: Long Video Understanding by Multi-Round Dynamical Collaboration of MLLM Agents: Boyu Chen,

Zhengrong Yue,

Siran Chen,

Zikang Wang,

Yang Liu,

Peng Li,

Yali Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Boyu and Yue, Zhengrong and Chen, Siran and Wang, Zikang and Liu, Yang and Li, Peng and Wang, Yali}, title = {LVAgent: Long Video Understanding by Multi-Round Dynamical Collaboration of MLLM Agents}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20237-20246} }
Spatio-Spectral Pattern Illumination for Direct and Indirect Separation from a Single Hyperspectral Image: Shin Ishihara,

Imari Sato; [pdf] [supp]
[bibtex]
@InProceedings{Ishihara_2025_ICCV, author = {Ishihara, Shin and Sato, Imari}, title = {Spatio-Spectral Pattern Illumination for Direct and Indirect Separation from a Single Hyperspectral Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26827-26836} }
Explaining Human Preferences via Metrics for Structured 3D Reconstruction: Jack Langerman,

Denys Rozumnyi,

Yuzhong Huang,

Dmytro Mishkin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Langerman_2025_ICCV, author = {Langerman, Jack and Rozumnyi, Denys and Huang, Yuzhong and Mishkin, Dmytro}, title = {Explaining Human Preferences via Metrics for Structured 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26944-26953} }
VistaDream: Sampling multiview consistent images for single-view scene reconstruction: Haiping Wang,

Yuan Liu,

Ziwei Liu,

Wenping Wang,

Zhen Dong,

Bisheng Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Haiping and Liu, Yuan and Liu, Ziwei and Wang, Wenping and Dong, Zhen and Yang, Bisheng}, title = {VistaDream: Sampling multiview consistent images for single-view scene reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26772-26782} }
LangScene-X: Reconstruct Generalizable 3D Language-Embedded Scenes with TriMap Video Diffusion: Fangfu Liu,

Hao Li,

Jiawei Chi,

Hanyang Wang,

Minghui Yang,

Fudong Wang,

Yueqi Duan; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Fangfu and Li, Hao and Chi, Jiawei and Wang, Hanyang and Yang, Minghui and Wang, Fudong and Duan, Yueqi}, title = {LangScene-X: Reconstruct Generalizable 3D Language-Embedded Scenes with TriMap Video Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29010-29020} }
Orchid: Image Latent Diffusion for Joint Appearance and Geometry Generation: Akshay Krishnan,

Xinchen Yan,

Vincent Casser,

Abhijit Kundu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Krishnan_2025_ICCV, author = {Krishnan, Akshay and Yan, Xinchen and Casser, Vincent and Kundu, Abhijit}, title = {Orchid: Image Latent Diffusion for Joint Appearance and Geometry Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28217-28227} }
MonoMVSNet: Monocular Priors Guided Multi-View Stereo Network: Jianfei Jiang,

Qiankun Liu,

Haochen Yu,

Hongyuan Liu,

Liyong Wang,

Jiansheng Chen,

Huimin Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_ICCV, author = {Jiang, Jianfei and Liu, Qiankun and Yu, Haochen and Liu, Hongyuan and Wang, Liyong and Chen, Jiansheng and Ma, Huimin}, title = {MonoMVSNet: Monocular Priors Guided Multi-View Stereo Network}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27806-27816} }
Top2Pano: Learning to Generate Indoor Panoramas from Top-Down View: Zitong Zhang,

Suranjan Gautam,

Rui Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Zitong and Gautam, Suranjan and Yu, Rui}, title = {Top2Pano: Learning to Generate Indoor Panoramas from Top-Down View}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28493-28502} }
Unbiased Missing-modality Multimodal Learning: Ruiting Dai,

Chenxi Li,

Yandong Yan,

Lisi Mo,

Ke Qin,

Tao He; [pdf]
[bibtex]
@InProceedings{Dai_2025_ICCV, author = {Dai, Ruiting and Li, Chenxi and Yan, Yandong and Mo, Lisi and Qin, Ke and He, Tao}, title = {Unbiased Missing-modality Multimodal Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24507-24517} }
Intermediate Connectors and Geometric Priors for Language-Guided Affordance Segmentation on Unseen Object Categories: Yicong Li,

Yiyang Chen,

Zhenyuan Ma,

Junbin Xiao,

Xiang Wang,

Angela Yao; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Yicong and Chen, Yiyang and Ma, Zhenyuan and Xiao, Junbin and Wang, Xiang and Yao, Angela}, title = {Intermediate Connectors and Geometric Priors for Language-Guided Affordance Segmentation on Unseen Object Categories}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22836-22845} }
FALCON: Resolving Visual Redundancy and Fragmentation in High-resolution Multimodal Large Language Models via Visual Registers: Renshan Zhang,

Rui Shao,

Gongwei Chen,

Miao Zhang,

Kaiwen Zhou,

Weili Guan,

Liqiang Nie; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Renshan and Shao, Rui and Chen, Gongwei and Zhang, Miao and Zhou, Kaiwen and Guan, Weili and Nie, Liqiang}, title = {FALCON: Resolving Visual Redundancy and Fragmentation in High-resolution Multimodal Large Language Models via Visual Registers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23530-23540} }
Transformer-based Tooth Alignment Prediction with Occlusion and Collision Constraints: Zhenxing Dong,

Jiazhou Chen; [pdf] [arXiv]
[bibtex]
@InProceedings{Dong_2025_ICCV, author = {Dong, Zhenxing and Chen, Jiazhou}, title = {Transformer-based Tooth Alignment Prediction with Occlusion and Collision Constraints}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25145-25154} }
Perspective-aware 3D Gaussian Inpainting with Multi-view Consistency: Yuxin Cheng,

Binxiao Huang,

Taiqiang Wu,

Wenyong Zhou,

Chenchen Ding,

Zhengwu Liu,

Graziano Chesi,

Ngai Wong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2025_ICCV, author = {Cheng, Yuxin and Huang, Binxiao and Wu, Taiqiang and Zhou, Wenyong and Ding, Chenchen and Liu, Zhengwu and Chesi, Graziano and Wong, Ngai}, title = {Perspective-aware 3D Gaussian Inpainting with Multi-view Consistency}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28503-28513} }
DistillDrive: End-to-End Multi-Mode Autonomous Driving Distillation by Isomorphic Hetero-Source Planning Model: Rui Yu,

Xianghang Zhang,

Runkai Zhao,

Huaicheng Yan,

Meng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Rui and Zhang, Xianghang and Zhao, Runkai and Yan, Huaicheng and Wang, Meng}, title = {DistillDrive: End-to-End Multi-Mode Autonomous Driving Distillation by Isomorphic Hetero-Source Planning Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26188-26197} }
Wide2Long: Learning Lens Compression and Perspective Adjustment for Wide-Angle to Telephoto Translation: Soumyadipta Banerjee,

Jiaul H. Paik,

Debashis Sen; [pdf] [supp]
[bibtex]
@InProceedings{Banerjee_2025_ICCV, author = {Banerjee, Soumyadipta and Paik, Jiaul H. and Sen, Debashis}, title = {Wide2Long: Learning Lens Compression and Perspective Adjustment for Wide-Angle to Telephoto Translation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29001-29009} }
V2XScenes: A Multiple Challenging Traffic Conditions Dataset for Large-Range Vehicle-Infrastructure Collaborative Perception: Bowen Wang,

Yafei Wang,

Wei Gong,

Siheng Chen,

Genjia Liu,

Minhao Xiong,

Chin Long Ng; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Bowen and Wang, Yafei and Gong, Wei and Chen, Siheng and Liu, Genjia and Xiong, Minhao and Ng, Chin Long}, title = {V2XScenes: A Multiple Challenging Traffic Conditions Dataset for Large-Range Vehicle-Infrastructure Collaborative Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28385-28395} }
Is CLIP ideal? No. Can we fix it? Yes!: Raphi Kang,

Yue Song,

Georgia Gkioxari,

Pietro Perona; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2025_ICCV, author = {Kang, Raphi and Song, Yue and Gkioxari, Georgia and Perona, Pietro}, title = {Is CLIP ideal? No. Can we fix it? Yes!}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22436-22446} }
LaneDiffusion: Improving Centerline Graph Learning via Prior Injected BEV Feature Generation: Zijie Wang,

Weiming Zhang,

Wei Zhang,

Xiao Tan,

Hongxing Liu,

Yaowei Wang,

Guanbin Li; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Zijie and Zhang, Weiming and Zhang, Wei and Tan, Xiao and Liu, Hongxing and Wang, Yaowei and Li, Guanbin}, title = {LaneDiffusion: Improving Centerline Graph Learning via Prior Injected BEV Feature Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27052-27062} }
Height-Fidelity Dense Global Fusion for Multi-modal 3D Object Detection: Hanshi Wang,

Jin Gao,

Weiming Hu,

Zhipeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Hanshi and Gao, Jin and Hu, Weiming and Zhang, Zhipeng}, title = {Height-Fidelity Dense Global Fusion for Multi-modal 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26664-26674} }
DIH-CLIP: Unleashing the Diversity of Multi-Head Self-Attention for Training-Free Open-Vocabulary Semantic Segmentation: Songsong Duan,

Xi Yang,

Nannan Wang; [pdf] [supp]
[bibtex]
@InProceedings{Duan_2025_ICCV, author = {Duan, Songsong and Yang, Xi and Wang, Nannan}, title = {DIH-CLIP: Unleashing the Diversity of Multi-Head Self-Attention for Training-Free Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22794-22803} }
ExploreGS: Explorable 3D Scene Reconstruction with Virtual Camera Samplings and Diffusion Priors: Minsu Kim,

Subin Jeon,

In Cho,

Mijin Yoo,

Seon Joo Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Minsu and Jeon, Subin and Cho, In and Yoo, Mijin and Kim, Seon Joo}, title = {ExploreGS: Explorable 3D Scene Reconstruction with Virtual Camera Samplings and Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27042-27051} }
Prototypes are Balanced Units for Efficient and Effective Partially Relevant Video Retrieval: WonJun Moon,

Cheol-Ho Cho,

Woojin Jun,

Taeoh Kim,

Inwoong Lee,

Dongyoon Wee,

Minho Shim,

Jae-Pil Heo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Moon_2025_ICCV, author = {Moon, WonJun and Cho, Cheol-Ho and Jun, Woojin and Kim, Taeoh and Lee, Inwoong and Wee, Dongyoon and Shim, Minho and Heo, Jae-Pil}, title = {Prototypes are Balanced Units for Efficient and Effective Partially Relevant Video Retrieval}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21789-21799} }
SpiLiFormer: Enhancing Spiking Transformers with Lateral Inhibition: Zeqi Zheng,

Yanchen Huang,

Yingchao Yu,

Zizheng Zhu,

Junfeng Tang,

Zhaofei Yu,

Yaochu Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Zeqi and Huang, Yanchen and Yu, Yingchao and Zhu, Zizheng and Tang, Junfeng and Yu, Zhaofei and Jin, Yaochu}, title = {SpiLiFormer: Enhancing Spiking Transformers with Lateral Inhibition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24539-24548} }
Seeing the Trees for the Forest: Rethinking Weakly-Supervised Medical Visual Grounding: Ta Duc Huy,

Duy Anh Huynh,

Yutong Xie,

Yuankai Qi,

Qi Chen,

Phi Le Nguyen,

Sen Kim Tran,

Son Lam Phung,

Anton van den Hengel,

Zhibin Liao,

Minh-Son To,

Johan W. Verjans,

Vu Minh Hieu Phan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huy_2025_ICCV, author = {Huy, Ta Duc and Huynh, Duy Anh and Xie, Yutong and Qi, Yuankai and Chen, Qi and Le Nguyen, Phi and Tran, Sen Kim and Phung, Son Lam and van den Hengel, Anton and Liao, Zhibin and To, Minh-Son and Verjans, Johan W. and Phan, Vu Minh Hieu}, title = {Seeing the Trees for the Forest: Rethinking Weakly-Supervised Medical Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24445-24455} }
GroundingSuite: Measuring Complex Multi-Granular Pixel Grounding: Rui Hu,

Lianghui Zhu,

Yuxuan Zhang,

Tianheng Cheng,

Lei Liu,

Heng Liu,

Longjin Ran,

Xiaoxin Chen,

Wenyu Liu,

Xinggang Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_ICCV, author = {Hu, Rui and Zhu, Lianghui and Zhang, Yuxuan and Cheng, Tianheng and Liu, Lei and Liu, Heng and Ran, Longjin and Chen, Xiaoxin and Liu, Wenyu and Wang, Xinggang}, title = {GroundingSuite: Measuring Complex Multi-Granular Pixel Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23105-23114} }
VideoRFSplat: Direct Scene-Level Text-to-3D Gaussian Splatting Generation with Flexible Pose and Multi-View Joint Modeling: Hyojun Go,

Byeongjun Park,

Hyelin Nam,

Byung-Hoon Kim,

Hyungjin Chung,

Changick Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Go_2025_ICCV, author = {Go, Hyojun and Park, Byeongjun and Nam, Hyelin and Kim, Byung-Hoon and Chung, Hyungjin and Kim, Changick}, title = {VideoRFSplat: Direct Scene-Level Text-to-3D Gaussian Splatting Generation with Flexible Pose and Multi-View Joint Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26706-26717} }
DATA: Domain-And-Time Alignment for High-Quality Feature Fusion in Collaborative Perception: Chengchang Tian,

Jianwei Ma,

Yan Huang,

Zhanye Chen,

Honghao Wei,

Hui Zhang,

Wei Hong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tian_2025_ICCV, author = {Tian, Chengchang and Ma, Jianwei and Huang, Yan and Chen, Zhanye and Wei, Honghao and Zhang, Hui and Hong, Wei}, title = {DATA: Domain-And-Time Alignment for High-Quality Feature Fusion in Collaborative Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28643-28652} }
Bringing RNNs Back to Efficient Open-Ended Video Understanding: Weili Xu,

Enxin Song,

Wenhao Chai,

Xuexiang Wen,

Tian Ye,

Gaoang Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Weili and Song, Enxin and Chai, Wenhao and Wen, Xuexiang and Ye, Tian and Wang, Gaoang}, title = {Bringing RNNs Back to Efficient Open-Ended Video Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23453-23465} }
ViM-VQ: Efficient Post-Training Vector Quantization for Visual Mamba: Juncan Deng,

Shuaiting Li,

Zeyu Wang,

Kedong Xu,

Hong Gu,

Kejie Huang; [pdf] [supp]
[bibtex]
@InProceedings{Deng_2025_ICCV, author = {Deng, Juncan and Li, Shuaiting and Wang, Zeyu and Xu, Kedong and Gu, Hong and Huang, Kejie}, title = {ViM-VQ: Efficient Post-Training Vector Quantization for Visual Mamba}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24518-24527} }
EVEv2: Improved Baselines for Encoder-Free Vision-Language Models: Haiwen Diao,

Xiaotong Li,

Yufeng Cui,

Yueze Wang,

Haoge Deng,

Ting Pan,

Wenxuan Wang,

Huchuan Lu,

Xinlong Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Diao_2025_ICCV, author = {Diao, Haiwen and Li, Xiaotong and Cui, Yufeng and Wang, Yueze and Deng, Haoge and Pan, Ting and Wang, Wenxuan and Lu, Huchuan and Wang, Xinlong}, title = {EVEv2: Improved Baselines for Encoder-Free Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21014-21025} }
End-to-End Driving with Online Trajectory Evaluation via BEV World Model: Yingyan Li,

Yuqi Wang,

Yang Liu,

Jiawei He,

Lue Fan,

Zhaoxiang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Yingyan and Wang, Yuqi and Liu, Yang and He, Jiawei and Fan, Lue and Zhang, Zhaoxiang}, title = {End-to-End Driving with Online Trajectory Evaluation via BEV World Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27137-27146} }
MiDSummer: Multi-Guidance Diffusion for Controllable Zero-Shot Immersive Gaussian Splatting Scene Generation: Anjun Hu,

Richard Tomsett,

Valentin Gourmet,

Massimo Camplani,

Jas Kandola,

Hanting Xie; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2025_ICCV, author = {Hu, Anjun and Tomsett, Richard and Gourmet, Valentin and Camplani, Massimo and Kandola, Jas and Xie, Hanting}, title = {MiDSummer: Multi-Guidance Diffusion for Controllable Zero-Shot Immersive Gaussian Splatting Scene Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26793-26805} }
Global Regulation and Excitation via Attention Tuning for Stereo Matching: Jiahao Li,

Xinhong Chen,

Zhengmin Jiang,

Qian Zhou,

Yung-Hui Li,

Jianping Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Jiahao and Chen, Xinhong and Jiang, Zhengmin and Zhou, Qian and Li, Yung-Hui and Wang, Jianping}, title = {Global Regulation and Excitation via Attention Tuning for Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25539-25549} }
Visual Test-time Scaling for GUI Agent Grounding: Tiange Luo,

Lajanugen Logeswaran,

Justin Johnson,

Honglak Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2025_ICCV, author = {Luo, Tiange and Logeswaran, Lajanugen and Johnson, Justin and Lee, Honglak}, title = {Visual Test-time Scaling for GUI Agent Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19989-19998} }
Hydra-NeXt: Robust Closed-Loop Driving with Open-Loop Training: Zhenxin Li,

Shihao Wang,

Shiyi Lan,

Zhiding Yu,

Zuxuan Wu,

Jose M. Alvarez; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Zhenxin and Wang, Shihao and Lan, Shiyi and Yu, Zhiding and Wu, Zuxuan and Alvarez, Jose M.}, title = {Hydra-NeXt: Robust Closed-Loop Driving with Open-Loop Training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27305-27314} }
Heatmap Regression without Soft-Argmax for Facial Landmark Detection: Chiao-An Yang,

Raymond A. Yeh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Chiao-An and Yeh, Raymond A.}, title = {Heatmap Regression without Soft-Argmax for Facial Landmark Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28729-28739} }
Neural Shell Texture Splatting: More Details and Fewer Primitives: Xin Zhang,

Anpei Chen,

Jincheng Xiong,

Pinxuan Dai,

Yujun Shen,

Weiwei Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xin and Chen, Anpei and Xiong, Jincheng and Dai, Pinxuan and Shen, Yujun and Xu, Weiwei}, title = {Neural Shell Texture Splatting: More Details and Fewer Primitives}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25229-25238} }
ZeroKey: Point-Level Reasoning and Zero-Shot 3D Keypoint Detection from Large Language Models: Bingchen Gong,

Diego Gomez,

Abdullah Hamdi,

Abdelrahman Eldesokey,

Ahmed Abdelreheem,

Peter Wonka,

Maks Ovsjanikov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gong_2025_ICCV, author = {Gong, Bingchen and Gomez, Diego and Hamdi, Abdullah and Eldesokey, Abdelrahman and Abdelreheem, Ahmed and Wonka, Peter and Ovsjanikov, Maks}, title = {ZeroKey: Point-Level Reasoning and Zero-Shot 3D Keypoint Detection from Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22089-22099} }
DreamCube: RGB-D Panorama Generation via Multi-plane Synchronization: Yukun Huang,

Yanning Zhou,

Jianan Wang,

Kaiyi Huang,

Xihui Liu; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Yukun and Zhou, Yanning and Wang, Jianan and Huang, Kaiyi and Liu, Xihui}, title = {DreamCube: RGB-D Panorama Generation via Multi-plane Synchronization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24922-24932} }
ScanEdit: Hierarchically-Guided Functional 3D Scan Editing: Mohamed El Amine Boudjoghra,

Ivan Laptev,

Angela Dai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{El_Amine_Boudjoghra_2025_ICCV, author = {El Amine Boudjoghra, Mohamed and Laptev, Ivan and Dai, Angela}, title = {ScanEdit: Hierarchically-Guided Functional 3D Scan Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27105-27115} }
DeFSS: Image-to-Mask Denoising Learning for Few-shot Segmentation: Zishu Qin,

Junhao Xu,

Weifeng Ge; [pdf]
[bibtex]
@InProceedings{Qin_2025_ICCV, author = {Qin, Zishu and Xu, Junhao and Ge, Weifeng}, title = {DeFSS: Image-to-Mask Denoising Learning for Few-shot Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22232-22240} }
Video2BEV: Transforming Drone Videos to BEVs for Video-based Geo-localization: Hao Ju,

Shaofei Huang,

Si Liu,

Zhedong Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ju_2025_ICCV, author = {Ju, Hao and Huang, Shaofei and Liu, Si and Zheng, Zhedong}, title = {Video2BEV: Transforming Drone Videos to BEVs for Video-based Geo-localization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27073-27083} }
High-Precision 3D Measurement of Complex Textured Surfaces Using Multiple Filtering Approach: Yuchong Chen,

Jian Yu,

Shaoyan Gai,

Zeyu Cai,

Feipeng Da; [pdf]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Yuchong and Yu, Jian and Gai, Shaoyan and Cai, Zeyu and Da, Feipeng}, title = {High-Precision 3D Measurement of Complex Textured Surfaces Using Multiple Filtering Approach}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25670-25679} }
RGE-GS: Reward-Guided Expansive Driving Scene Reconstruction via Diffusion Priors: Sicong Du,

Jiarun Liu,

Qifeng Chen,

Hao-Xiang Chen,

Tai-Jiang Mu,

Sheng Yang; [pdf] [supp]
[bibtex]
@InProceedings{Du_2025_ICCV, author = {Du, Sicong and Liu, Jiarun and Chen, Qifeng and Chen, Hao-Xiang and Mu, Tai-Jiang and Yang, Sheng}, title = {RGE-GS: Reward-Guided Expansive Driving Scene Reconstruction via Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25756-25764} }
VIPerson: Flexibly Generating Virtual Identity for Person Re-Identification: Xiao-Wen Zhang,

Delong Zhang,

Yi-Xing Peng,

Zhi Ouyang,

Jingke Meng,

Wei-Shi Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xiao-Wen and Zhang, Delong and Peng, Yi-Xing and Ouyang, Zhi and Meng, Jingke and Zheng, Wei-Shi}, title = {VIPerson: Flexibly Generating Virtual Identity for Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23374-23384} }
PRM: Photometric Stereo based Large Reconstruction Model: Wenhang Ge,

Jiantao Lin,

Guibao Shen,

Jiawei Feng,

Tao Hu,

Xinli Xu,

Ying-Cong Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ge_2025_ICCV, author = {Ge, Wenhang and Lin, Jiantao and Shen, Guibao and Feng, Jiawei and Hu, Tao and Xu, Xinli and Chen, Ying-Cong}, title = {PRM: Photometric Stereo based Large Reconstruction Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25009-25018} }
DriveArena: A Closed-loop Generative Simulation Platform for Autonomous Driving: Xuemeng Yang,

Licheng Wen,

Tiantian Wei,

Yukai Ma,

Jianbiao Mei,

Xin Li,

Wenjie Lei,

Daocheng Fu,

Pinlong Cai,

Min Dou,

Liang He,

Yong Liu,

Botian Shi,

Yu Qiao; [pdf] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Xuemeng and Wen, Licheng and Wei, Tiantian and Ma, Yukai and Mei, Jianbiao and Li, Xin and Lei, Wenjie and Fu, Daocheng and Cai, Pinlong and Dou, Min and He, Liang and Liu, Yong and Shi, Botian and Qiao, Yu}, title = {DriveArena: A Closed-loop Generative Simulation Platform for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26933-26943} }
InvRGB+L: Inverse Rendering of Complex Scenes with Unified Color and LiDAR Reflectance Modeling: Xiaoxue Chen,

Bhargav Chandaka,

Chih-Hao Lin,

Ya-Qin Zhang,

David Forsyth,

Hao Zhao,

Shenlong Wang; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Xiaoxue and Chandaka, Bhargav and Lin, Chih-Hao and Zhang, Ya-Qin and Forsyth, David and Zhao, Hao and Wang, Shenlong}, title = {InvRGB+L: Inverse Rendering of Complex Scenes with Unified Color and LiDAR Reflectance Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27176-27186} }
Joint Semantic and Rendering Enhancements in 3D Gaussian Modeling with Anisotropic Local Encoding: Jingming He,

Chongyi Li,

Shiqi Wang,

Sam Kwong; [pdf] [supp]
[bibtex]
@InProceedings{He_2025_ICCV, author = {He, Jingming and Li, Chongyi and Wang, Shiqi and Kwong, Sam}, title = {Joint Semantic and Rendering Enhancements in 3D Gaussian Modeling with Anisotropic Local Encoding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28354-28363} }
Unveiling the Invisible: Reasoning Complex Occlusions Amodally with AURA: Zhixuan Li,

Hyunse Yoon,

Sanghoon Lee,

Weisi Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Zhixuan and Yoon, Hyunse and Lee, Sanghoon and Lin, Weisi}, title = {Unveiling the Invisible: Reasoning Complex Occlusions Amodally with AURA}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21927-21937} }
Lumina-Image 2.0: A Unified and Efficient Image Generative Framework: Qi Qin,

Le Zhuo,

Yi Xin,

Ruoyi Du,

Zhen Li,

Bin Fu,

Yiting Lu,

Xinyue Li,

Dongyang Liu,

Xiangyang Zhu,

Will Beddow,

Erwann Millon,

Victor Perez,

Wenhai Wang,

Yu Qiao,

Bo Zhang,

Xiaohong Liu,

Hongsheng Li,

Chang Xu,

Peng Gao; [pdf] [supp]
[bibtex]
@InProceedings{Qin_2025_ICCV, author = {Qin, Qi and Zhuo, Le and Xin, Yi and Du, Ruoyi and Li, Zhen and Fu, Bin and Lu, Yiting and Li, Xinyue and Liu, Dongyang and Zhu, Xiangyang and Beddow, Will and Millon, Erwann and Perez, Victor and Wang, Wenhai and Qiao, Yu and Zhang, Bo and Liu, Xiaohong and Li, Hongsheng and Xu, Chang and Gao, Peng}, title = {Lumina-Image 2.0: A Unified and Efficient Image Generative Framework}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20031-20042} }
mmCooper: A Multi-agent Multi-stage Communication-efficient and Collaboration-robust Cooperative Perception Framework: Bingyi Liu,

Jian Teng,

Hongfei Xue,

Enshu Wang,

Chuanhui Zhu,

Pu Wang,

Libing Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Bingyi and Teng, Jian and Xue, Hongfei and Wang, Enshu and Zhu, Chuanhui and Wang, Pu and Wu, Libing}, title = {mmCooper: A Multi-agent Multi-stage Communication-efficient and Collaboration-robust Cooperative Perception Framework}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28396-28406} }
OCSplats: Observation Completeness Quantification and Label Noise Separation in 3DGS: Han Ling,

Xian Xu,

Yinghui Sun,

Quansen Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ling_2025_ICCV, author = {Ling, Han and Xu, Xian and Sun, Yinghui and Sun, Quansen}, title = {OCSplats: Observation Completeness Quantification and Label Noise Separation in 3DGS}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25680-25689} }
Open-Vocabulary HOI Detection with Interaction-aware Prompt and Concept Calibration: Ting Lei,

Shaofeng Yin,

Qingchao Chen,

Yuxin Peng,

Yang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lei_2025_ICCV, author = {Lei, Ting and Yin, Shaofeng and Chen, Qingchao and Peng, Yuxin and Liu, Yang}, title = {Open-Vocabulary HOI Detection with Interaction-aware Prompt and Concept Calibration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23945-23957} }
ROVI: A VLM-LLM Re-Captioned Dataset for Open-Vocabulary Instance-Grounded Text-to-Image Generation: Cihang Peng,

Qiming Hou,

Zhong Ren,

Kun Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2025_ICCV, author = {Peng, Cihang and Hou, Qiming and Ren, Zhong and Zhou, Kun}, title = {ROVI: A VLM-LLM Re-Captioned Dataset for Open-Vocabulary Instance-Grounded Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20204-20214} }
A View-consistent Sampling Method for Regularized Training of Neural Radiance Fields: Aoxiang Fan,

Corentin Dumery,

Nicolas Talabot,

Pascal Fua; [pdf] [arXiv]
[bibtex]
@InProceedings{Fan_2025_ICCV, author = {Fan, Aoxiang and Dumery, Corentin and Talabot, Nicolas and Fua, Pascal}, title = {A View-consistent Sampling Method for Regularized Training of Neural Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25961-25971} }
AdsQA: Towards Advertisement Video Understanding: Xinwei Long,

Kai Tian,

Peng Xu,

Guoli Jia,

Jingxuan Li,

Sa Yang,

Yihua Shao,

Kaiyan Zhang,

Che Jiang,

Hao Xu,

Yang Liu,

Jiaheng Ma,

Bowen Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Long_2025_ICCV, author = {Long, Xinwei and Tian, Kai and Xu, Peng and Jia, Guoli and Li, Jingxuan and Yang, Sa and Shao, Yihua and Zhang, Kaiyan and Jiang, Che and Xu, Hao and Liu, Yang and Ma, Jiaheng and Zhou, Bowen}, title = {AdsQA: Towards Advertisement Video Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23396-23407} }
Fine-grained Abnormality Prompt Learning for Zero-shot Anomaly Detection: Jiawen Zhu,

Yew-Soon Ong,

Chunhua Shen,

Guansong Pang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Jiawen and Ong, Yew-Soon and Shen, Chunhua and Pang, Guansong}, title = {Fine-grained Abnormality Prompt Learning for Zero-shot Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22241-22251} }
Unified Open-World Segmentation with Multi-Modal Prompts: Yang Liu,

Yufei Yin,

Chenchen Jing,

Muzhi Zhu,

Hao Chen,

Yuling Xi,

Bo Feng,

Hao Wang,

Shiyu Li,

Chunhua Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Yang and Yin, Yufei and Jing, Chenchen and Zhu, Muzhi and Chen, Hao and Xi, Yuling and Feng, Bo and Wang, Hao and Li, Shiyu and Shen, Chunhua}, title = {Unified Open-World Segmentation with Multi-Modal Prompts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21557-21567} }
ClaraVid: A Holistic Scene Reconstruction Benchmark From Aerial Perspective With Delentropy-Based Complexity Profiling: Radu Beche,

Sergiu Nedevschi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Beche_2025_ICCV, author = {Beche, Radu and Nedevschi, Sergiu}, title = {ClaraVid: A Holistic Scene Reconstruction Benchmark From Aerial Perspective With Delentropy-Based Complexity Profiling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26015-26025} }
X2-Gaussian: 4D Radiative Gaussian Splatting for Continuous-time Tomographic Reconstruction: Weihao Yu,

Yuanhao Cai,

Ruyi Zha,

Zhiwen Fan,

Chenxin Li,

Yixuan Yuan; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Weihao and Cai, Yuanhao and Zha, Ruyi and Fan, Zhiwen and Li, Chenxin and Yuan, Yixuan}, title = {X2-Gaussian: 4D Radiative Gaussian Splatting for Continuous-time Tomographic Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24728-24738} }
FrameFusion: Combining Similarity and Importance for Video Token Reduction on Large Vision Language Models: Tianyu Fu,

Tengxuan Liu,

Qinghao Han,

Guohao Dai,

Shengen Yan,

Huazhong Yang,

Xuefei Ning,

Yu Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fu_2025_ICCV, author = {Fu, Tianyu and Liu, Tengxuan and Han, Qinghao and Dai, Guohao and Yan, Shengen and Yang, Huazhong and Ning, Xuefei and Wang, Yu}, title = {FrameFusion: Combining Similarity and Importance for Video Token Reduction on Large Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22654-22663} }
AURELIA: Test-time Reasoning Distillation in Audio-Visual LLMs: Sanjoy Chowdhury,

Hanan Gani,

Nishit Anand,

Sayan Nag,

Ruohan Gao,

Mohamed Elhoseiny,

Salman Khan,

Dinesh Manocha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chowdhury_2025_ICCV, author = {Chowdhury, Sanjoy and Gani, Hanan and Anand, Nishit and Nag, Sayan and Gao, Ruohan and Elhoseiny, Mohamed and Khan, Salman and Manocha, Dinesh}, title = {AURELIA: Test-time Reasoning Distillation in Audio-Visual LLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22899-22910} }
FB-Diff: Fourier Basis-guided Diffusion for Temporal Interpolation of 4D Medical Imaging: Xin You,

Runze Yang,

Chuyan Zhang,

Zhongliang Jiang,

Jie Yang,

Nassir Navab; [pdf] [supp]
[bibtex]
@InProceedings{You_2025_ICCV, author = {You, Xin and Yang, Runze and Zhang, Chuyan and Jiang, Zhongliang and Yang, Jie and Navab, Nassir}, title = {FB-Diff: Fourier Basis-guided Diffusion for Temporal Interpolation of 4D Medical Imaging}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28010-28020} }
UNIS: A Unified Framework for Achieving Unbiased Neural Implicit Surfaces in Volume Rendering: Junkai Deng,

Hanting Niu,

Jiaze Li,

Fei Hou,

Ying He; [pdf] [supp]
[bibtex]
@InProceedings{Deng_2025_ICCV, author = {Deng, Junkai and Niu, Hanting and Li, Jiaze and Hou, Fei and He, Ying}, title = {UNIS: A Unified Framework for Achieving Unbiased Neural Implicit Surfaces in Volume Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27671-27680} }
Seeing the Unseen: A Semantic Alignment and Context-Aware Prompt Framework for Open-Vocabulary Camouflaged Object Segmentation: Peng Ren,

Tian Bai,

Jing Sun,

Fuming Sun; [pdf]
[bibtex]
@InProceedings{Ren_2025_ICCV, author = {Ren, Peng and Bai, Tian and Sun, Jing and Sun, Fuming}, title = {Seeing the Unseen: A Semantic Alignment and Context-Aware Prompt Framework for Open-Vocabulary Camouflaged Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23657-23666} }
Test-time Adaptation for Foundation Medical Segmentation Model Without Parametric Updates: Kecheng Chen,

Xinyu Luo,

Tiexin Qin,

Jie Liu,

Hui Liu,

Victor Ho Fun Lee,

Hong Yan,

Haoliang Li; [pdf] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Kecheng and Luo, Xinyu and Qin, Tiexin and Liu, Jie and Liu, Hui and Lee, Victor Ho Fun and Yan, Hong and Li, Haoliang}, title = {Test-time Adaptation for Foundation Medical Segmentation Model Without Parametric Updates}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20075-20084} }
Hierarchy UGP: Hierarchy Unified Gaussian Primitive for Large-Scale Dynamic Scene Reconstruction: Hongyang Sun,

Qinglin Yang,

Jiawei Wang,

Zhen Xu,

Chen Liu,

Yida Wang,

Kun Zhan,

Hujun Bao,

Xiaowei Zhou,

Sida Peng; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Hongyang and Yang, Qinglin and Wang, Jiawei and Xu, Zhen and Liu, Chen and Wang, Yida and Zhan, Kun and Bao, Hujun and Zhou, Xiaowei and Peng, Sida}, title = {Hierarchy UGP: Hierarchy Unified Gaussian Primitive for Large-Scale Dynamic Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26252-26262} }
DisCo: Towards Distinct and Coherent Visual Encapsulation in Video MLLMs: Jiahe Zhao,

Rongkun Zheng,

Yi Wang,

Helin Wang,

Hengshuang Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_ICCV, author = {Zhao, Jiahe and Zheng, Rongkun and Wang, Yi and Wang, Helin and Zhao, Hengshuang}, title = {DisCo: Towards Distinct and Coherent Visual Encapsulation in Video MLLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21710-21720} }
Axis-level Symmetry Detection with Group-Equivariant Representation: Wongyun Yu,

Ahyun Seo,

Minsu Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Wongyun and Seo, Ahyun and Cho, Minsu}, title = {Axis-level Symmetry Detection with Group-Equivariant Representation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24791-24800} }
Referring to Any Person: Qing Jiang,

Lin Wu,

Zhaoyang Zeng,

Tianhe Ren,

Yuda Xiong,

Yihao Chen,

Liu Qin,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_ICCV, author = {Jiang, Qing and Wu, Lin and Zeng, Zhaoyang and Ren, Tianhe and Xiong, Yuda and Chen, Yihao and Qin, Liu and Zhang, Lei}, title = {Referring to Any Person}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21667-21678} }
Statistical Confidence Rescoring for Robust 3D Scene Graph Generation from Multi-View Images: Qi Xun Yeo,

Yanyan Li,

Gim Hee Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yeo_2025_ICCV, author = {Yeo, Qi Xun and Li, Yanyan and Lee, Gim Hee}, title = {Statistical Confidence Rescoring for Robust 3D Scene Graph Generation from Multi-View Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24999-25008} }
DASH: 4D Hash Encoding with Self-Supervised Decomposition for Real-Time Dynamic Scene Rendering: Jie Chen,

Zhangchi Hu,

Peixi Wu,

Huyue Zhu,

Hebei Li,

Xiaoyan Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Jie and Hu, Zhangchi and Wu, Peixi and Zhu, Huyue and Li, Hebei and Sun, Xiaoyan}, title = {DASH: 4D Hash Encoding with Self-Supervised Decomposition for Real-Time Dynamic Scene Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26349-26359} }
MamV2XCalib: V2X-based Target-less Infrastructure Camera Calibration with State Space Model: Yaoye Zhu,

Zhe Wang,

Yan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_ICCV, author = {Zhu, Yaoye and Wang, Zhe and Wang, Yan}, title = {MamV2XCalib: V2X-based Target-less Infrastructure Camera Calibration with State Space Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26696-26705} }
HiP-AD: Hierarchical and Multi-Granularity Planning with Deformable Attention for Autonomous Driving in a Single Decoder: Yingqi Tang,

Zhuoran Xu,

Zhaotie Meng,

Erkang Cheng; [pdf] [supp]
[bibtex]
@InProceedings{Tang_2025_ICCV, author = {Tang, Yingqi and Xu, Zhuoran and Meng, Zhaotie and Cheng, Erkang}, title = {HiP-AD: Hierarchical and Multi-Granularity Planning with Deformable Attention for Autonomous Driving in a Single Decoder}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25605-25615} }
Visual Textualization for Image Prompted Object Detection: Yongjian Wu,

Yang Zhou,

Jiya Saiyin,

Bingzheng Wei,

Yan Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Yongjian and Zhou, Yang and Saiyin, Jiya and Wei, Bingzheng and Xu, Yan}, title = {Visual Textualization for Image Prompted Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20900-20910} }
A Token-level Text Image Foundation Model for Document Understanding: Tongkun Guan,

Zining Wang,

Pei Fu,

Zhengtao Guo,

Wei Shen,

Kai Zhou,

Tiezhu Yue,

Chen Duan,

Hao Sun,

Qianyi Jiang,

Junfeng Luo,

Xiaokang Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guan_2025_ICCV, author = {Guan, Tongkun and Wang, Zining and Fu, Pei and Guo, Zhengtao and Shen, Wei and Zhou, Kai and Yue, Tiezhu and Duan, Chen and Sun, Hao and Jiang, Qianyi and Luo, Junfeng and Yang, Xiaokang}, title = {A Token-level Text Image Foundation Model for Document Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23210-23220} }
NeRF Is a Valuable Assistant for 3D Gaussian Splatting: Shuangkang Fang,

I-Chao Shen,

Takeo Igarashi,

Yufeng Wang,

ZeSheng Wang,

Yi Yang,

Wenrui Ding,

Shuchang Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2025_ICCV, author = {Fang, Shuangkang and Shen, I-Chao and Igarashi, Takeo and Wang, Yufeng and Wang, ZeSheng and Yang, Yi and Ding, Wenrui and Zhou, Shuchang}, title = {NeRF Is a Valuable Assistant for 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26230-26240} }
Towards Visual Localization Interoperability: Cross-Feature for Collaborative Visual Localization and Mapping: Alberto Jaenal,

Paula Carbó Cubero,

José Araújo,

André Mateus; [pdf] [supp]
[bibtex]
@InProceedings{Jaenal_2025_ICCV, author = {Jaenal, Alberto and Cubero, Paula Carb\'o and Ara\'ujo, Jos\'e and Mateus, Andr\'e}, title = {Towards Visual Localization Interoperability: Cross-Feature for Collaborative Visual Localization and Mapping}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26783-26792} }
PerLDiff: Controllable Street View Synthesis Using Perspective-Layout Diffusion Model: Jinhua Zhang,

Hualian Sheng,

Sijia Cai,

Bing Deng,

Qiao Liang,

Wen Li,

Ying Fu,

Jieping Ye,

Shuhang Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Jinhua and Sheng, Hualian and Cai, Sijia and Deng, Bing and Liang, Qiao and Li, Wen and Fu, Ying and Ye, Jieping and Gu, Shuhang}, title = {PerLDiff: Controllable Street View Synthesis Using Perspective-Layout Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26306-26315} }
VISO: Accelerating In-orbit Object Detection with Language-Guided Mask Learning and Sparse Inference: Meiqi Wang,

Han Qiu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Meiqi and Qiu, Han}, title = {VISO: Accelerating In-orbit Object Detection with Language-Guided Mask Learning and Sparse Inference}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23300-23310} }
ATCTrack: Aligning Target-Context Cues with Dynamic Target States for Robust Vision-Language Tracking: Xiaokun Feng,

Shiyu Hu,

Xuchen Li,

Dailing Zhang,

Meiqi Wu,

Jing Zhang,

Xiaotang Chen,

Kaiqi Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2025_ICCV, author = {Feng, Xiaokun and Hu, Shiyu and Li, Xuchen and Zhang, Dailing and Wu, Meiqi and Zhang, Jing and Chen, Xiaotang and Huang, Kaiqi}, title = {ATCTrack: Aligning Target-Context Cues with Dynamic Target States for Robust Vision-Language Tracking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19850-19861} }
SeaS: Few-shot Industrial Anomaly Image Generation with Separation and Sharing Fine-tuning: Zhewei Dai,

Shilei Zeng,

Haotian Liu,

Xurui Li,

Feng Xue,

Yu Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dai_2025_ICCV, author = {Dai, Zhewei and Zeng, Shilei and Liu, Haotian and Li, Xurui and Xue, Feng and Zhou, Yu}, title = {SeaS: Few-shot Industrial Anomaly Image Generation with Separation and Sharing Fine-tuning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23135-23144} }
AMD: Adaptive Momentum and Decoupled Contrastive Learning Framework for Robust Long-Tail Trajectory Prediction: Bin Rao,

Haicheng Liao,

Yanchen Guan,

Chengyue Wang,

Bonan Wang,

Jiaxun Zhang,

Zhenning Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rao_2025_ICCV, author = {Rao, Bin and Liao, Haicheng and Guan, Yanchen and Wang, Chengyue and Wang, Bonan and Zhang, Jiaxun and Li, Zhenning}, title = {AMD: Adaptive Momentum and Decoupled Contrastive Learning Framework for Robust Long-Tail Trajectory Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28849-28858} }
Music Grounding by Short Video: Zijie Xin,

Minquan Wang,

Jingyu Liu,

Quan Chen,

Ye Ma,

Peng Jiang,

Xirong Li; [pdf] [arXiv]
[bibtex]
@InProceedings{Xin_2025_ICCV, author = {Xin, Zijie and Wang, Minquan and Liu, Jingyu and Chen, Quan and Ma, Ye and Jiang, Peng and Li, Xirong}, title = {Music Grounding by Short Video}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22285-22293} }
When Confidence Fails: Revisiting Pseudo-Label Selection in Semi-supervised Semantic Segmentation: Pan Liu,

Jinshi Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Pan and Liu, Jinshi}, title = {When Confidence Fails: Revisiting Pseudo-Label Selection in Semi-supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21874-21884} }
CalliReader: Contextualizing Chinese Calligraphy via an Embedding-Aligned Vision-Language Model: Yuxuan Luo,

Jiaqi Tang,

Chenyi Huang,

Feiyang Hao,

Zhouhui Lian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2025_ICCV, author = {Luo, Yuxuan and Tang, Jiaqi and Huang, Chenyi and Hao, Feiyang and Lian, Zhouhui}, title = {CalliReader: Contextualizing Chinese Calligraphy via an Embedding-Aligned Vision-Language Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23030-23040} }
LOTS of Fashion! Multi-Conditioning for Image Generation via Sketch-Text Pairing: Federico Girella,

Davide Talon,

Ziyue Liu,

Zanxi Ruan,

Yiming Wang,

Marco Cristani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Girella_2025_ICCV, author = {Girella, Federico and Talon, Davide and Liu, Ziyue and Ruan, Zanxi and Wang, Yiming and Cristani, Marco}, title = {LOTS of Fashion! Multi-Conditioning for Image Generation via Sketch-Text Pairing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19711-19720} }
Plug-in Feedback Self-adaptive Attention in CLIP for Training-free Open-Vocabulary Segmentation: Zhixiang Chi,

Yanan Wu,

Li Gu,

Huan Liu,

Ziqiang Wang,

Yang Zhang,

Yang Wang,

Konstantinos Plataniotis; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chi_2025_ICCV, author = {Chi, Zhixiang and Wu, Yanan and Gu, Li and Liu, Huan and Wang, Ziqiang and Zhang, Yang and Wang, Yang and Plataniotis, Konstantinos}, title = {Plug-in Feedback Self-adaptive Attention in CLIP for Training-free Open-Vocabulary Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22815-22825} }
Street Gaussians without 3D Object Tracker: Ruida Zhang,

Chengxi Li,

Chenyangguang Zhang,

Xingyu Liu,

Haili Yuan,

Yanyan Li,

Xiangyang Ji,

Gim Hee Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Ruida and Li, Chengxi and Zhang, Chenyangguang and Liu, Xingyu and Yuan, Haili and Li, Yanyan and Ji, Xiangyang and Lee, Gim Hee}, title = {Street Gaussians without 3D Object Tracker}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25722-25734} }
Principles of Visual Tokens for Efficient Video Understanding: Xinyue Hao,

Gen Li,

Shreyank N Gowda,

Robert B. Fisher,

Jonathan Huang,

Anurag Arnab,

Laura Sevilla-Lara; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hao_2025_ICCV, author = {Hao, Xinyue and Li, Gen and Gowda, Shreyank N and Fisher, Robert B. and Huang, Jonathan and Arnab, Anurag and Sevilla-Lara, Laura}, title = {Principles of Visual Tokens for Efficient Video Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21254-21264} }
Similarity Memory Prior is All You Need for Medical Image Segmentation: Hao Tang,

Zhiqing Guo,

Liejun Wang,

Chao Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2025_ICCV, author = {Tang, Hao and Guo, Zhiqing and Wang, Liejun and Liu, Chao}, title = {Similarity Memory Prior is All You Need for Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23009-23018} }
MMGeo: Multimodal Compositional Geo-Localization for UAVs: Yuxiang Ji,

Boyong He,

Zhuoyue Tan,

Liaoni Wu; [pdf]
[bibtex]
@InProceedings{Ji_2025_ICCV, author = {Ji, Yuxiang and He, Boyong and Tan, Zhuoyue and Wu, Liaoni}, title = {MMGeo: Multimodal Compositional Geo-Localization for UAVs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25165-25175} }
InstructSeg: Unifying Instructed Visual Segmentation with Multi-modal Large Language Models: Cong Wei,

Yujie Zhong,

Haoxian Tan,

Yingsen Zeng,

Yong Liu,

Hongfa Wang,

Yujiu Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2025_ICCV, author = {Wei, Cong and Zhong, Yujie and Tan, Haoxian and Zeng, Yingsen and Liu, Yong and Wang, Hongfa and Yang, Yujiu}, title = {InstructSeg: Unifying Instructed Visual Segmentation with Multi-modal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20193-20203} }
Beyond Simple Edits: Composed Video Retrieval with Dense Modifications: Omkar Thawakar,

Dmitry Demidov,

Ritesh Thawkar,

Rao Muhammad Anwer,

Mubarak Shah,

Fahad Shahbaz Khan,

Salman Khan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Thawakar_2025_ICCV, author = {Thawakar, Omkar and Demidov, Dmitry and Thawkar, Ritesh and Anwer, Rao Muhammad and Shah, Mubarak and Khan, Fahad Shahbaz and Khan, Salman}, title = {Beyond Simple Edits: Composed Video Retrieval with Dense Modifications}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20435-20444} }
Leveraging Prior Knowledge of Diffusion Model for Person Search: Giyeol Kim,

Sooyoung Yang,

Jihyong Oh,

Myungjoo Kang,

Chanho Eom; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Giyeol and Yang, Sooyoung and Oh, Jihyong and Kang, Myungjoo and Eom, Chanho}, title = {Leveraging Prior Knowledge of Diffusion Model for Person Search}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20301-20312} }
PS3: A Multimodal Transformer Integrating Pathology Reports with Histology Images and Biological Pathways for Cancer Survival Prediction: Manahil Raza,

Ayesha Azam,

Talha Qaiser,

Nasir Rajpoot; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Raza_2025_ICCV, author = {Raza, Manahil and Azam, Ayesha and Qaiser, Talha and Rajpoot, Nasir}, title = {PS3: A Multimodal Transformer Integrating Pathology Reports with Histology Images and Biological Pathways for Cancer Survival Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22175-22186} }
Debiasing Trace Guidance: Top-down Trace Distillation and Bottom-up Velocity Alignment for Unsupervised Anomaly Detection: Xingjian Wang,

Li Chai,

Jiming Chen; [pdf]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Xingjian and Chai, Li and Chen, Jiming}, title = {Debiasing Trace Guidance: Top-down Trace Distillation and Bottom-up Velocity Alignment for Unsupervised Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22989-22998} }
EDM: Efficient Deep Feature Matching: Xi Li,

Tong Rao,

Cihui Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Xi and Rao, Tong and Pan, Cihui}, title = {EDM: Efficient Deep Feature Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26198-26208} }
CorrCLIP: Reconstructing Patch Correlations in CLIP for Open-Vocabulary Semantic Segmentation: Dengke Zhang,

Fagui Liu,

Quan Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Dengke and Liu, Fagui and Tang, Quan}, title = {CorrCLIP: Reconstructing Patch Correlations in CLIP for Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24677-24687} }
LBM: Latent Bridge Matching for Fast Image-to-Image Translation: Clément Chadebec,

Onur Tasar,

Sanjeev Sreetharan,

Benjamin Aubin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chadebec_2025_ICCV, author = {Chadebec, Cl\'ement and Tasar, Onur and Sreetharan, Sanjeev and Aubin, Benjamin}, title = {LBM: Latent Bridge Matching for Fast Image-to-Image Translation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29086-29098} }
Towards a 3D Transfer-based Black-box Attack via Critical Feature Guidance: Shuchao Pang,

Zhenghan Chen,

Shen Zhang,

Liming Lu,

Siyuan Liang,

Anan Du,

Yongbin Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pang_2025_ICCV, author = {Pang, Shuchao and Chen, Zhenghan and Zhang, Shen and Lu, Liming and Liang, Siyuan and Du, Anan and Zhou, Yongbin}, title = {Towards a 3D Transfer-based Black-box Attack via Critical Feature Guidance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26912-26922} }
LLM-Assisted Semantic Guidance for Sparsely Annotated Remote Sensing Object Detection: Wei Liao,

Chunyan Xu,

Chenxu Wang,

Zhen Cui; [pdf] [arXiv]
[bibtex]
@InProceedings{Liao_2025_ICCV, author = {Liao, Wei and Xu, Chunyan and Wang, Chenxu and Cui, Zhen}, title = {LLM-Assisted Semantic Guidance for Sparsely Annotated Remote Sensing Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22519-22528} }
Geometric Alignment and Prior Modulation for View-Guided Point Cloud Completion on Unseen Categories: Jingqiao Xiu,

Yicong Li,

Na Zhao,

Han Fang,

Xiang Wang,

Angela Yao; [pdf]
[bibtex]
@InProceedings{Xiu_2025_ICCV, author = {Xiu, Jingqiao and Li, Yicong and Zhao, Na and Fang, Han and Wang, Xiang and Yao, Angela}, title = {Geometric Alignment and Prior Modulation for View-Guided Point Cloud Completion on Unseen Categories}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27435-27444} }
ODDR: Outlier Detection & Dimension Reduction Based Defense Against Adversarial Patches: Nandish Chattopadhyay,

Amira Guesmi,

Muhammad Abdullah Hanif,

Bassem Ouni,

Muhammad Shafique; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chattopadhyay_2025_ICCV, author = {Chattopadhyay, Nandish and Guesmi, Amira and Hanif, Muhammad Abdullah and Ouni, Bassem and Shafique, Muhammad}, title = {ODDR: Outlier Detection \& Dimension Reduction Based Defense Against Adversarial Patches}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22999-23008} }
Stochastic Gradient Estimation for Higher-Order Differentiable Rendering: Zican Wang,

Michael Fischer,

Tobias Ritschel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Zican and Fischer, Michael and Ritschel, Tobias}, title = {Stochastic Gradient Estimation for Higher-Order Differentiable Rendering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28198-28206} }
PolGS: Polarimetric Gaussian Splatting for Fast Reflective Surface Reconstruction: Yufei Han,

Bowen Tie,

Heng Guo,

Youwei Lyu,

Si Li,

Boxin Shi,

Yunpeng Jia,

Zhanyu Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2025_ICCV, author = {Han, Yufei and Tie, Bowen and Guo, Heng and Lyu, Youwei and Li, Si and Shi, Boxin and Jia, Yunpeng and Ma, Zhanyu}, title = {PolGS: Polarimetric Gaussian Splatting for Fast Reflective Surface Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28073-28082} }
ERNet: Efficient Non-Rigid Registration Network for Point Sequences: Guangzhao He,

Yuxi Xiao,

Zhen Xu,

Xiaowei Zhou,

Sida Peng; [pdf] [supp]
[bibtex]
@InProceedings{He_2025_ICCV, author = {He, Guangzhao and Xiao, Yuxi and Xu, Zhen and Zhou, Xiaowei and Peng, Sida}, title = {ERNet: Efficient Non-Rigid Registration Network for Point Sequences}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27156-27165} }
Does Your Vision-Language Model Get Lost in the Long Video Sampling Dilemma?: Tianyuan Qu,

Longxiang Tang,

Bohao Peng,

Senqiao Yang,

Bei Yu,

Jiaya Jia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qu_2025_ICCV, author = {Qu, Tianyuan and Tang, Longxiang and Peng, Bohao and Yang, Senqiao and Yu, Bei and Jia, Jiaya}, title = {Does Your Vision-Language Model Get Lost in the Long Video Sampling Dilemma?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20889-20899} }
Towards Safer and Understandable Driver Intention Prediction: Mukilan Karuppasamy,

Shankar Gangisetty,

Shyam Nandan Rai,

Carlo Masone,

C V Jawahar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Karuppasamy_2025_ICCV, author = {Karuppasamy, Mukilan and Gangisetty, Shankar and Rai, Shyam Nandan and Masone, Carlo and Jawahar, C V}, title = {Towards Safer and Understandable Driver Intention Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25378-25387} }
Generative Gaussian Splatting: Generating 3D Scenes with Video Diffusion Priors: Katja Schwarz,

Norman Müller,

Peter Kontschieder; [pdf] [supp]
[bibtex]
@InProceedings{Schwarz_2025_ICCV, author = {Schwarz, Katja and M\"uller, Norman and Kontschieder, Peter}, title = {Generative Gaussian Splatting: Generating 3D Scenes with Video Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27510-27520} }
AR-1-to-3: Single Image to Consistent 3D Object via Next-View Prediction: Xuying Zhang,

Yupeng Zhou,

Kai Wang,

Yikai Wang,

Zhen Li,

Shaohui Jiao,

Daquan Zhou,

Qibin Hou,

Ming-Ming Cheng; [pdf]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xuying and Zhou, Yupeng and Wang, Kai and Wang, Yikai and Li, Zhen and Jiao, Shaohui and Zhou, Daquan and Hou, Qibin and Cheng, Ming-Ming}, title = {AR-1-to-3: Single Image to Consistent 3D Object via Next-View Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26273-26283} }
Inverse 3D Microscopy Rendering for Cell Shape Inference with Active Mesh: Sacha Ichbiah,

Anshuman Sinha,

Fabrice Delbary,

Hervé Turlier; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ichbiah_2025_ICCV, author = {Ichbiah, Sacha and Sinha, Anshuman and Delbary, Fabrice and Turlier, Herv\'e}, title = {Inverse 3D Microscopy Rendering for Cell Shape Inference with Active Mesh}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26987-26998} }
DC-TTA: Divide-and-Conquer Framework for Test-Time Adaptation of Interactive Segmentation: Jihun Kim,

Hoyong Kwon,

Hyeokjun Kweon,

Wooseong Jeong,

Kuk-Jin Yoon; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Jihun and Kwon, Hoyong and Kweon, Hyeokjun and Jeong, Wooseong and Yoon, Kuk-Jin}, title = {DC-TTA: Divide-and-Conquer Framework for Test-Time Adaptation of Interactive Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23279-23289} }
NAVER: A Neuro-Symbolic Compositional Automaton for Visual Grounding with Explicit Logic Reasoning: Zhixi Cai,

Fucai Ke,

Simindokht Jahangard,

Maria Garcia de la Banda,

Reza Haffari,

Peter J. Stuckey,

Hamid Rezatofighi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2025_ICCV, author = {Cai, Zhixi and Ke, Fucai and Jahangard, Simindokht and de la Banda, Maria Garcia and Haffari, Reza and Stuckey, Peter J. and Rezatofighi, Hamid}, title = {NAVER: A Neuro-Symbolic Compositional Automaton for Visual Grounding with Explicit Logic Reasoning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24078-24089} }
MergeOcc: Bridge the Domain Gap between Different LiDARs for Robust Occupancy Prediction: Zikun Xu,

Shaobing Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Zikun and Xu, Shaobing}, title = {MergeOcc: Bridge the Domain Gap between Different LiDARs for Robust Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26539-26548} }
AutoScape: Geometry-Consistent Long-Horizon Scene Generation: Jiacheng Chen,

Ziyu Jiang,

Mingfu Liang,

Bingbing Zhuang,

Jong-Chyi Su,

Sparsh Garg,

Ying Wu,

Manmohan Chandraker; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Jiacheng and Jiang, Ziyu and Liang, Mingfu and Zhuang, Bingbing and Su, Jong-Chyi and Garg, Sparsh and Wu, Ying and Chandraker, Manmohan}, title = {AutoScape: Geometry-Consistent Long-Horizon Scene Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25700-25711} }
Synchronizing Task Behavior: Aligning Multiple Tasks during Test-Time Training: Wooseong Jeong,

Jegyeong Cho,

Youngho Yoon,

Kuk-Jin Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeong_2025_ICCV, author = {Jeong, Wooseong and Cho, Jegyeong and Yoon, Youngho and Yoon, Kuk-Jin}, title = {Synchronizing Task Behavior: Aligning Multiple Tasks during Test-Time Training}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24340-24350} }
FLOSS: Free Lunch in Open-vocabulary Semantic Segmentation: Yasser Benigmim,

Mohammad Fahes,

Tuan-Hung Vu,

Andrei Bursuc,

Raoul de Charette; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Benigmim_2025_ICCV, author = {Benigmim, Yasser and Fahes, Mohammad and Vu, Tuan-Hung and Bursuc, Andrei and de Charette, Raoul}, title = {FLOSS: Free Lunch in Open-vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21471-21481} }
ToF-Splatting: Dense SLAM using Sparse Time-of-Flight Depth and Multi-Frame Integration: Andrea Conti,

Matteo Poggi,

Valerio Cambareri,

Martin R. Oswald,

Stefano Mattoccia; [pdf] [supp]
[bibtex]
@InProceedings{Conti_2025_ICCV, author = {Conti, Andrea and Poggi, Matteo and Cambareri, Valerio and Oswald, Martin R. and Mattoccia, Stefano}, title = {ToF-Splatting: Dense SLAM using Sparse Time-of-Flight Depth and Multi-Frame Integration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28344-28353} }
Correspondence-Free Fast and Robust Spherical Point Pattern Registration: Anik Sarker,

Alan T. Asbeck; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sarker_2025_ICCV, author = {Sarker, Anik and Asbeck, Alan T.}, title = {Correspondence-Free Fast and Robust Spherical Point Pattern Registration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28156-28166} }
CT-ScanGaze: A Dataset and Baselines for 3D Volumetric Scanpath Modeling: Trong Thang Pham,

Akash Awasthi,

Saba Khan,

Esteban Duran Marti,

Tien-Phat Nguyen,

Khoa Vo,

Minh Tran,

Son Nguyen,

Cuong Tran,

Yuki Ikebe,

Anh Totti Nguyen,

Anh Nguyen,

Zhigang Deng,

Carol C. Wu,

Hien Nguyen,

Ngan Le; [pdf] [supp]
[bibtex]
@InProceedings{Pham_2025_ICCV, author = {Pham, Trong Thang and Awasthi, Akash and Khan, Saba and Marti, Esteban Duran and Nguyen, Tien-Phat and Vo, Khoa and Tran, Minh and Nguyen, Son and Tran, Cuong and Ikebe, Yuki and Nguyen, Anh Totti and Nguyen, Anh and Deng, Zhigang and Wu, Carol C. and Nguyen, Hien and Le, Ngan}, title = {CT-ScanGaze: A Dataset and Baselines for 3D Volumetric Scanpath Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21732-21743} }
MOBIUS: Big-to-Mobile Universal Instance Segmentation via Multi-modal Bottleneck Fusion and Calibrated Decoder Pruning: Mattia Segu,

Marta Tintore Gazulla,

Yongqin Xian,

Luc Van Gool,

Federico Tombari; [pdf] [supp]
[bibtex]
@InProceedings{Segu_2025_ICCV, author = {Segu, Mattia and Gazulla, Marta Tintore and Xian, Yongqin and Van Gool, Luc and Tombari, Federico}, title = {MOBIUS: Big-to-Mobile Universal Instance Segmentation via Multi-modal Bottleneck Fusion and Calibrated Decoder Pruning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20726-20736} }
VoxelKP: A Voxel-based Network Architecture for Human Keypoint Estimation in LiDAR Data: Jian Shi,

Peter Wonka; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2025_ICCV, author = {Shi, Jian and Wonka, Peter}, title = {VoxelKP: A Voxel-based Network Architecture for Human Keypoint Estimation in LiDAR Data}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28282-28291} }
Normal and Abnormal Pathology Knowledge-Augmented Vision-Language Model for Anomaly Detection in Pathology Images: Jinsol Song,

Jiamu Wang,

Anh Tien Nguyen,

Keunho Byeon,

Sangjeong Ahn,

Sung Hak Lee,

Jin Tae Kwak; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2025_ICCV, author = {Song, Jinsol and Wang, Jiamu and Nguyen, Anh Tien and Byeon, Keunho and Ahn, Sangjeong and Lee, Sung Hak and Kwak, Jin Tae}, title = {Normal and Abnormal Pathology Knowledge-Augmented Vision-Language Model for Anomaly Detection in Pathology Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22066-22076} }
COME: Dual Structure-Semantic Learning with Collaborative MoE for Universal Lesion Detection Across Heterogeneous Ultrasound Datasets: Lingyu Chen,

Yawen Zeng,

Yue Wang,

Peng Wan,

Guochen Ning,

Hongen Liao,

Daoqiang Zhang,

Fang Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Lingyu and Zeng, Yawen and Wang, Yue and Wan, Peng and Ning, Guochen and Liao, Hongen and Zhang, Daoqiang and Chen, Fang}, title = {COME: Dual Structure-Semantic Learning with Collaborative MoE for Universal Lesion Detection Across Heterogeneous Ultrasound Datasets}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21460-21470} }
An Efficient Hybrid Vision Transformer for TinyML Applications: Fanhong Zeng,

Huanan Li,

Juntao Guan,

Rui Fan,

Tong Wu,

Xilong Wang,

Rui Lai; [pdf]
[bibtex]
@InProceedings{Zeng_2025_ICCV, author = {Zeng, Fanhong and Li, Huanan and Guan, Juntao and Fan, Rui and Wu, Tong and Wang, Xilong and Lai, Rui}, title = {An Efficient Hybrid Vision Transformer for TinyML Applications}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19914-19924} }
Object-centric Video Question Answering with Visual Grounding and Referring: Haochen Wang,

Qirui Chen,

Cilin Yan,

Jiayin Cai,

Xiaolong Jiang,

Yao Hu,

Weidi Xie,

Stratis Gavves; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Haochen and Chen, Qirui and Yan, Cilin and Cai, Jiayin and Jiang, Xiaolong and Hu, Yao and Xie, Weidi and Gavves, Stratis}, title = {Object-centric Video Question Answering with Visual Grounding and Referring}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22274-22284} }
AG2aussian: Anchor-Graph Structured Gaussian Splatting for Instance-Level 3D Scene Understanding and Editing: Zhaonan Wang,

Manyi Li,

Changhe Tu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Zhaonan and Li, Manyi and Tu, Changhe}, title = {AG2aussian: Anchor-Graph Structured Gaussian Splatting for Instance-Level 3D Scene Understanding and Editing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26806-26816} }
Soft Local Completeness: Rethinking Completeness in XAI: Ziv Weiss Haddad,

Oren Barkan,

Yehonatan Elisha,

Noam Koenigstein; [pdf] [supp]
[bibtex]
@InProceedings{Haddad_2025_ICCV, author = {Haddad, Ziv Weiss and Barkan, Oren and Elisha, Yehonatan and Koenigstein, Noam}, title = {Soft Local Completeness: Rethinking Completeness in XAI}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19794-19804} }
Open-ended Hierarchical Streaming Video Understanding with Vision Language Models: Hyolim Kang,

Yunsu Park,

Youngbeom Yoo,

Yeeun Choi,

Seon Joo Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2025_ICCV, author = {Kang, Hyolim and Park, Yunsu and Yoo, Youngbeom and Choi, Yeeun and Kim, Seon Joo}, title = {Open-ended Hierarchical Streaming Video Understanding with Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20715-20725} }
SUB: Benchmarking CBM Generalization via Synthetic Attribute Substitutions: Jessica Bader,

Leander Girrbach,

Stephan Alaniz,

Zeynep Akata; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bader_2025_ICCV, author = {Bader, Jessica and Girrbach, Leander and Alaniz, Stephan and Akata, Zeynep}, title = {SUB: Benchmarking CBM Generalization via Synthetic Attribute Substitutions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23188-23198} }
LLaVA-SP: Enhancing Visual Representation with Visual Spatial Tokens for MLLMs: Haoran Lou,

Chunxiao Fan,

Ziyan Liu,

Yuexin Wu,

Xinliang Wang; [pdf] [supp]
[bibtex]
@InProceedings{Lou_2025_ICCV, author = {Lou, Haoran and Fan, Chunxiao and Liu, Ziyan and Wu, Yuexin and Wang, Xinliang}, title = {LLaVA-SP: Enhancing Visual Representation with Visual Spatial Tokens for MLLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22014-22024} }
LLM-enhanced Action-aware Multi-modal Prompt Tuning for Image-Text Matching: Mengxiao Tian,

Xinxiao Wu,

Shuo Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tian_2025_ICCV, author = {Tian, Mengxiao and Wu, Xinxiao and Yang, Shuo}, title = {LLM-enhanced Action-aware Multi-modal Prompt Tuning for Image-Text Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20748-20757} }
PointGAC: Geometric-Aware Codebook for Masked Point Modeling: Abiao Li,

Chenlei Lv,

Yuming Fang,

Yifan Zuo,

Jian Zhang,

Guofeng Mei; [pdf]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Abiao and Lv, Chenlei and Fang, Yuming and Zuo, Yifan and Zhang, Jian and Mei, Guofeng}, title = {PointGAC: Geometric-Aware Codebook for Masked Point Modeling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24989-24998} }
Unbiased Region-Language Alignment for Open-Vocabulary Dense Prediction: Yunheng Li,

Yuxuan Li,

Quan-Sheng Zeng,

Wenhai Wang,

Qibin Hou,

Ming-Ming Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Yunheng and Li, Yuxuan and Zeng, Quan-Sheng and Wang, Wenhai and Hou, Qibin and Cheng, Ming-Ming}, title = {Unbiased Region-Language Alignment for Open-Vocabulary Dense Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23795-23805} }
CLIPer: Hierarchically Improving Spatial Representation of CLIP for Open-Vocabulary Semantic Segmentation: Lin Sun,

Jiale Cao,

Jin Xie,

Xiaoheng Jiang,

Yanwei Pang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Lin and Cao, Jiale and Xie, Jin and Jiang, Xiaoheng and Pang, Yanwei}, title = {CLIPer: Hierarchically Improving Spatial Representation of CLIP for Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23199-23209} }
SGAD: Semantic and Geometric-aware Descriptor for Local Feature Matching: Xiangzeng Liu,

Chi Wang,

Guanglu Shi,

Xiaodong Zhang,

Qiguang Miao,

Miao Fan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Xiangzeng and Wang, Chi and Shi, Guanglu and Zhang, Xiaodong and Miao, Qiguang and Fan, Miao}, title = {SGAD: Semantic and Geometric-aware Descriptor for Local Feature Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27095-27104} }
RogSplat: Robust Gaussian Splatting via Generative Priors: Hanyang Kong,

Xingyi Yang,

Xinchao Wang; [pdf] [supp]
[bibtex]
@InProceedings{Kong_2025_ICCV, author = {Kong, Hanyang and Yang, Xingyi and Wang, Xinchao}, title = {RogSplat: Robust Gaussian Splatting via Generative Priors}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25735-25745} }
CARIM: Caption-Based Autonomous Driving Scene Retrieval via Inclusive Text Matching: Minjoo Ki,

Daejung Kim,

Kisung Kim,

Seon Joo Kim,

Jinhan Lee; [pdf] [supp]
[bibtex]
@InProceedings{Ki_2025_ICCV, author = {Ki, Minjoo and Kim, Daejung and Kim, Kisung and Kim, Seon Joo and Lee, Jinhan}, title = {CARIM: Caption-Based Autonomous Driving Scene Retrieval via Inclusive Text Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22036-22045} }
Zero-Shot Compositional Video Learning with Coding Rate Reduction: Heeseok Jung,

Jun-Hyeon Bak,

Yujin Jeong,

Gyugeun Lee,

Jinwoo Ahn,

Eun-Sol Kim; [pdf] [supp]
[bibtex]
@InProceedings{Jung_2025_ICCV, author = {Jung, Heeseok and Bak, Jun-Hyeon and Jeong, Yujin and Lee, Gyugeun and Ahn, Jinwoo and Kim, Eun-Sol}, title = {Zero-Shot Compositional Video Learning with Coding Rate Reduction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20508-20518} }
Benchmarking Egocentric Visual-Inertial SLAM at City Scale: Anusha Krishnan,

Shaohui Liu,

Paul-Edouard Sarlin,

Oscar Gentilhomme,

David Caruso,

Maurizio Monge,

Richard Newcombe,

Jakob Engel,

Marc Pollefeys; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Krishnan_2025_ICCV, author = {Krishnan, Anusha and Liu, Shaohui and Sarlin, Paul-Edouard and Gentilhomme, Oscar and Caruso, David and Monge, Maurizio and Newcombe, Richard and Engel, Jakob and Pollefeys, Marc}, title = {Benchmarking Egocentric Visual-Inertial SLAM at City Scale}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25207-25217} }
CATSplat: Context-Aware Transformer with Spatial Guidance for Generalizable 3D Gaussian Splatting from A Single-View Image: Wonseok Roh,

Hwanhee Jung,

Jong Wook Kim,

Seunggwan Lee,

Innfarn Yoo,

Andreas Lugmayr,

Seunggeun Chi,

Karthik Ramani,

Sangpil Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Roh_2025_ICCV, author = {Roh, Wonseok and Jung, Hwanhee and Kim, Jong Wook and Lee, Seunggwan and Yoo, Innfarn and Lugmayr, Andreas and Chi, Seunggeun and Ramani, Karthik and Kim, Sangpil}, title = {CATSplat: Context-Aware Transformer with Spatial Guidance for Generalizable 3D Gaussian Splatting from A Single-View Image}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28228-28238} }
Learning Normals of Noisy Points by Local Gradient-Aware Surface Filtering: Qing Li,

Huifang Feng,

Xun Gong,

Yu-Shen Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Qing and Feng, Huifang and Gong, Xun and Liu, Yu-Shen}, title = {Learning Normals of Noisy Points by Local Gradient-Aware Surface Filtering}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28828-28838} }
VRBench: A Benchmark for Multi-Step Reasoning in Long Narrative Videos: Jiashuo Yu,

Yue Wu,

Meng Chu,

Zhifei Ren,

Zizheng Huang,

Pei Chu,

Ruijie Zhang,

Yinan He,

Qirui Li,

Songze Li,

Zhenxiang Li,

Zhongying Tu,

Conghui He,

Yu Qiao,

Yali Wang,

Yi Wang,

Limin Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Jiashuo and Wu, Yue and Chu, Meng and Ren, Zhifei and Huang, Zizheng and Chu, Pei and Zhang, Ruijie and He, Yinan and Li, Qirui and Li, Songze and Li, Zhenxiang and Tu, Zhongying and He, Conghui and Qiao, Yu and Wang, Yali and Wang, Yi and Wang, Limin}, title = {VRBench: A Benchmark for Multi-Step Reasoning in Long Narrative Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21655-21666} }
How Can Objects Help Video-Language Understanding?: Zitian Tang,

Shijie Wang,

Junho Cho,

Jaewook Yoo,

Chen Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2025_ICCV, author = {Tang, Zitian and Wang, Shijie and Cho, Junho and Yoo, Jaewook and Sun, Chen}, title = {How Can Objects Help Video-Language Understanding?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21994-22003} }
Conditional Latent Diffusion Models for Zero-Shot Instance Segmentation: Maximilian Ulmer,

Wout Boerdijk,

Rudolph Triebel,

Maximilian Durner; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ulmer_2025_ICCV, author = {Ulmer, Maximilian and Boerdijk, Wout and Triebel, Rudolph and Durner, Maximilian}, title = {Conditional Latent Diffusion Models for Zero-Shot Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24360-24369} }
Stable Diffusion Models are Secretly Good at Visual In-Context Learning: Trevine Oorloff,

Vishwanath Sindagi,

Wele Gedara Chaminda Bandara,

Ali Shafahi,

Amin Ghiasi,

Charan Prakash,

Reza Ardekani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Oorloff_2025_ICCV, author = {Oorloff, Trevine and Sindagi, Vishwanath and Bandara, Wele Gedara Chaminda and Shafahi, Ali and Ghiasi, Amin and Prakash, Charan and Ardekani, Reza}, title = {Stable Diffusion Models are Secretly Good at Visual In-Context Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23604-23613} }
Task-Specific Zero-shot Quantization-Aware Training for Object Detection: Changhao Li,

Xinrui Chen,

Ji Wang,

Kang Zhao,

Jianfei Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Changhao and Chen, Xinrui and Wang, Ji and Zhao, Kang and Chen, Jianfei}, title = {Task-Specific Zero-shot Quantization-Aware Training for Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22868-22878} }
Snakes and Ladders: Two Steps Up for VideoMamba: Hui Lu,

Albert A. Salah,

Ronald Poppe; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Hui and Salah, Albert A. and Poppe, Ronald}, title = {Snakes and Ladders: Two Steps Up for VideoMamba}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24234-24244} }
Generalized Few-Shot Point Cloud Segmentation via LLM-Assisted Hyper-Relation Matching: Zhaoyang Li,

Yuan Wang,

Guoxin Xiong,

Wangkai Li,

Yuwen Pan,

Tianzhu Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Zhaoyang and Wang, Yuan and Xiong, Guoxin and Li, Wangkai and Pan, Yuwen and Zhang, Tianzhu}, title = {Generalized Few-Shot Point Cloud Segmentation via LLM-Assisted Hyper-Relation Matching}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23063-23073} }
AdaptiveAE: An Adaptive Exposure Strategy for HDR Capturing in Dynamic Scenes: Tianyi Xu,

Fan Zhang,

Boxin Shi,

Tianfan Xue,

Yujin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Tianyi and Zhang, Fan and Shi, Boxin and Xue, Tianfan and Wang, Yujin}, title = {AdaptiveAE: An Adaptive Exposure Strategy for HDR Capturing in Dynamic Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25176-25185} }
UINavBench: A Framework for Comprehensive Evaluation of Interactive Digital Agents: Harsh Agrawal,

Eldon Schoop,

Xinlei Pan,

Anuj Mahajan,

Ari Seff,

Di Feng,

Ruijia Cheng,

Andres Romero Mier Y Teran,

Esteban Gomez,

Abhishek Sundararajan,

Forrest Huang,

Amanda Swearngin,

Mohana Prasad Sathya Moorthy,

Jeff Nichols,

Alexander Toshev; [pdf] [supp]
[bibtex]
@InProceedings{Agrawal_2025_ICCV, author = {Agrawal, Harsh and Schoop, Eldon and Pan, Xinlei and Mahajan, Anuj and Seff, Ari and Feng, Di and Cheng, Ruijia and Teran, Andres Romero Mier Y and Gomez, Esteban and Sundararajan, Abhishek and Huang, Forrest and Swearngin, Amanda and Moorthy, Mohana Prasad Sathya and Nichols, Jeff and Toshev, Alexander}, title = {UINavBench: A Framework for Comprehensive Evaluation of Interactive Digital Agents}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23353-23363} }
CityGS-X: A Scalable Architecture for Efficient and Geometrically Accurate Large-Scale Scene Reconstruction: Yuanyuan Gao,

Hao Li,

Jiaqi Chen,

Zhengyu Zou,

Zhihang Zhong,

Dingwen Zhang,

Xiao Sun,

Junwei Han; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2025_ICCV, author = {Gao, Yuanyuan and Li, Hao and Chen, Jiaqi and Zou, Zhengyu and Zhong, Zhihang and Zhang, Dingwen and Sun, Xiao and Han, Junwei}, title = {CityGS-X: A Scalable Architecture for Efficient and Geometrically Accurate Large-Scale Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27187-27196} }
OcRFDet: Object-Centric Radiance Fields for Multi-View 3D Object Detection in Autonomous Driving: Mingqian Ji,

Shanshan Zhang,

Jian Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ji_2025_ICCV, author = {Ji, Mingqian and Zhang, Shanshan and Yang, Jian}, title = {OcRFDet: Object-Centric Radiance Fields for Multi-View 3D Object Detection in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24933-24942} }
CC-OCR: A Comprehensive and Challenging OCR Benchmark for Evaluating Large Multimodal Models in Literacy: Zhibo Yang,

Jun Tang,

Zhaohai Li,

Pengfei Wang,

Jianqiang Wan,

Humen Zhong,

Xuejing Liu,

Mingkun Yang,

Peng Wang,

Shuai Bai,

Lianwen Jin,

Junyang Lin; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Zhibo and Tang, Jun and Li, Zhaohai and Wang, Pengfei and Wan, Jianqiang and Zhong, Humen and Liu, Xuejing and Yang, Mingkun and Wang, Peng and Bai, Shuai and Jin, Lianwen and Lin, Junyang}, title = {CC-OCR: A Comprehensive and Challenging OCR Benchmark for Evaluating Large Multimodal Models in Literacy}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21744-21754} }
RCTDistill: Cross-Modal Knowledge Distillation Framework for Radar-Camera 3D Object Detection with Temporal Fusion: Geonho Bang,

Minjae Seong,

Jisong Kim,

Geunju Baek,

Daye Oh,

Junhyung Kim,

Junho Koh,

Jun Won Choi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bang_2025_ICCV, author = {Bang, Geonho and Seong, Minjae and Kim, Jisong and Baek, Geunju and Oh, Daye and Kim, Junhyung and Koh, Junho and Choi, Jun Won}, title = {RCTDistill: Cross-Modal Knowledge Distillation Framework for Radar-Camera 3D Object Detection with Temporal Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25315-25324} }
ViSpeak: Visual Instruction Feedback in Streaming Videos: Shenghao Fu,

Qize Yang,

Yuan-Ming Li,

Yi-Xing Peng,

Kun-Yu Lin,

Xihan Wei,

Jian-Fang Hu,

Xiaohua Xie,

Wei-Shi Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fu_2025_ICCV, author = {Fu, Shenghao and Yang, Qize and Li, Yuan-Ming and Peng, Yi-Xing and Lin, Kun-Yu and Wei, Xihan and Hu, Jian-Fang and Xie, Xiaohua and Zheng, Wei-Shi}, title = {ViSpeak: Visual Instruction Feedback in Streaming Videos}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21778-21788} }
VideoAds for Fast-Paced Video Understanding: Zheyuan Zhang,

Wanying Dou,

Linkai Peng,

Hongyi Pan,

Ulas Bagci,

Boqing Gong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Zheyuan and Dou, Wanying and Peng, Linkai and Pan, Hongyi and Bagci, Ulas and Gong, Boqing}, title = {VideoAds for Fast-Paced Video Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21812-21821} }
MGSR: 2D/3D Mutual-boosted Gaussian Splatting for High-fidelity Surface Reconstruction under Various Light Conditions: Qingyuan Zhou,

Yuehu Gong,

Weidong Yang,

Jiaze Li,

Yeqi Luo,

Baixin Xu,

Shuhao Li,

Ben Fei,

Ying He; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Qingyuan and Gong, Yuehu and Yang, Weidong and Li, Jiaze and Luo, Yeqi and Xu, Baixin and Li, Shuhao and Fei, Ben and He, Ying}, title = {MGSR: 2D/3D Mutual-boosted Gaussian Splatting for High-fidelity Surface Reconstruction under Various Light Conditions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27295-27304} }
MIEB: Massive Image Embedding Benchmark: Chenghao Xiao,

Isaac Chung,

Imene Kerboua,

Jamie Stirling,

Xin Zhang,

Márton Kardos,

Roman Solomatin,

Noura Al Moubayed,

Kenneth Enevoldsen,

Niklas Muennighoff; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2025_ICCV, author = {Xiao, Chenghao and Chung, Isaac and Kerboua, Imene and Stirling, Jamie and Zhang, Xin and Kardos, M\'arton and Solomatin, Roman and Al Moubayed, Noura and Enevoldsen, Kenneth and Muennighoff, Niklas}, title = {MIEB: Massive Image Embedding Benchmark}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22187-22198} }
SViM3D: Stable Video Material Diffusion for Single Image 3D Generation: Andreas Engelhardt,

Mark Boss,

Vikram Voleti,

Chun-Han Yao,

Hendrik P. A. Lensch,

Varun Jampani; [pdf] [supp]
[bibtex]
@InProceedings{Engelhardt_2025_ICCV, author = {Engelhardt, Andreas and Boss, Mark and Voleti, Vikram and Yao, Chun-Han and Lensch, Hendrik P. A. and Jampani, Varun}, title = {SViM3D: Stable Video Material Diffusion for Single Image 3D Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28428-28439} }
CoMoGaussian: Continuous Motion-Aware Gaussian Splatting from Motion-Blurred Images: Jungho Lee,

Donghyeong Kim,

Dogyoon Lee,

Suhwan Cho,

Minhyeok Lee,

Wonjoon Lee,

Taeoh Kim,

Dongyoon Wee,

Sangyoun Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Jungho and Kim, Donghyeong and Lee, Dogyoon and Cho, Suhwan and Lee, Minhyeok and Lee, Wonjoon and Kim, Taeoh and Wee, Dongyoon and Lee, Sangyoun}, title = {CoMoGaussian: Continuous Motion-Aware Gaussian Splatting from Motion-Blurred Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26415-26424} }
WIPES: Wavelet-based Visual Primitives: Wenhao Zhang,

Hao Zhu,

Delong Wu,

Di Kang,

Linchao Bao,

Xun Cao,

Zhan Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Wenhao and Zhu, Hao and Wu, Delong and Kang, Di and Bao, Linchao and Cao, Xun and Ma, Zhan}, title = {WIPES: Wavelet-based Visual Primitives}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27338-27347} }
Unsupervised Histopathological Image Semantic Segmentation with Overlapping Patches Consistency Constraint: Wentian Cai,

Weizhao Weng,

Zihao Huang,

Yandan Chen,

Siquan Huang,

Ping Gao,

Victor C. M. Leung,

Ying Gao; [pdf]
[bibtex]
@InProceedings{Cai_2025_ICCV, author = {Cai, Wentian and Weng, Weizhao and Huang, Zihao and Chen, Yandan and Huang, Siquan and Gao, Ping and Leung, Victor C. M. and Gao, Ying}, title = {Unsupervised Histopathological Image Semantic Segmentation with Overlapping Patches Consistency Constraint}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23332-23341} }
Diff2I2P: Differentiable Image-to-Point Cloud Registration with Diffusion Prior: Juncheng Mu,

Chengwei Ren,

Weixiang Zhang,

Liang Pan,

Xiao-Ping Zhang,

Yue Gao; [pdf] [supp]
[bibtex]
@InProceedings{Mu_2025_ICCV, author = {Mu, Juncheng and Ren, Chengwei and Zhang, Weixiang and Pan, Liang and Zhang, Xiao-Ping and Gao, Yue}, title = {Diff2I2P: Differentiable Image-to-Point Cloud Registration with Diffusion Prior}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25777-25787} }
LightSwitch: Multi-view Relighting with Material-guided Diffusion: Yehonathan Litman,

Fernando De la Torre,

Shubham Tulsiani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Litman_2025_ICCV, author = {Litman, Yehonathan and De la Torre, Fernando and Tulsiani, Shubham}, title = {LightSwitch: Multi-view Relighting with Material-guided Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27750-27759} }
CanFields: Consolidating Diffeomorphic Flows for Non-Rigid 4D Interpolation from Arbitrary-Length Sequences: Miaowei Wang,

Changjian Li,

Amir Vaxman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Miaowei and Li, Changjian and Vaxman, Amir}, title = {CanFields: Consolidating Diffeomorphic Flows for Non-Rigid 4D Interpolation from Arbitrary-Length Sequences}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28587-28598} }
Towards Foundational Models for Single-Chip Radar: Tianshu Huang,

Akarsh Prabhakara,

Chuhan Chen,

Jay Karhade,

Deva Ramanan,

Matthew O'toole,

Anthony Rowe; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Tianshu and Prabhakara, Akarsh and Chen, Chuhan and Karhade, Jay and Ramanan, Deva and O'toole, Matthew and Rowe, Anthony}, title = {Towards Foundational Models for Single-Chip Radar}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24655-24665} }
Diffusion Transformer meets Multi-level Wavelet Spectrum for Single Image Super-Resolution: Peng Du,

Hui Li,

Han Xu,

Paul Barom Jeon,

Dongwook Lee,

Daehyun Ji,

Ran Yang,

Feng Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Du_2025_ICCV, author = {Du, Peng and Li, Hui and Xu, Han and Jeon, Paul Barom and Lee, Dongwook and Ji, Daehyun and Yang, Ran and Zhu, Feng}, title = {Diffusion Transformer meets Multi-level Wavelet Spectrum for Single Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19700-19710} }
Robust Unfolding Network for HDR Imaging with Modulo Cameras: Zhile Chen,

Hui Ji; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Zhile and Ji, Hui}, title = {Robust Unfolding Network for HDR Imaging with Modulo Cameras}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25218-25228} }
Player-Centric Multimodal Prompt Generation for Large Language Model Based Identity-Aware Basketball Video Captioning: Zeyu Xi,

Haoying Sun,

Yaofei Wu,

Junchi Yan,

Haoran Zhang,

Lifang Wu,

Liang Wang,

Changwen Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xi_2025_ICCV, author = {Xi, Zeyu and Sun, Haoying and Wu, Yaofei and Yan, Junchi and Zhang, Haoran and Wu, Lifang and Wang, Liang and Chen, Changwen}, title = {Player-Centric Multimodal Prompt Generation for Large Language Model Based Identity-Aware Basketball Video Captioning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24330-24339} }
ArchiSet: Benchmarking Editable and Consistent Single-View 3D Reconstruction of Buildings with Specific Window-to-Wall Ratios: Jun Yin,

Pengyu Zeng,

Licheng Shen,

Miao Zhang,

Jing Zhong,

Yuxing Han,

Shuai Lu; [pdf]
[bibtex]
@InProceedings{Yin_2025_ICCV, author = {Yin, Jun and Zeng, Pengyu and Shen, Licheng and Zhang, Miao and Zhong, Jing and Han, Yuxing and Lu, Shuai}, title = {ArchiSet: Benchmarking Editable and Consistent Single-View 3D Reconstruction of Buildings with Specific Window-to-Wall Ratios}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26004-26014} }
HyperGCT: A Dynamic Hyper-GNN-Learned Geometric Constraint for 3D Registration: Xiyu Zhang,

Jiayi Ma,

Jianwei Guo,

Wei Hu,

Zhaoshuai Qi,

Fei Hui,

Jiaqi Yang,

Yanning Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xiyu and Ma, Jiayi and Guo, Jianwei and Hu, Wei and Qi, Zhaoshuai and Hui, Fei and Yang, Jiaqi and Zhang, Yanning}, title = {HyperGCT: A Dynamic Hyper-GNN-Learned Geometric Constraint for 3D Registration}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24750-24759} }
UnMix-NeRF: Spectral Unmixing Meets Neural Radiance Fields: Fabian Perez,

Sara Rojas,

Carlos Hinojosa,

Hoover Rueda-Chacón,

Bernard Ghanem; [pdf] [supp]
[bibtex]
@InProceedings{Perez_2025_ICCV, author = {Perez, Fabian and Rojas, Sara and Hinojosa, Carlos and Rueda-Chac\'on, Hoover and Ghanem, Bernard}, title = {UnMix-NeRF: Spectral Unmixing Meets Neural Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26284-26293} }
Multidimensional Byte Pair Encoding: Shortened Sequences for Improved Visual Data Generation: Tim Elsner,

Paula Usinger,

Julius Nehring-Wirxel,

Gregor Kobsik,

Victor Czech,

Yanjiang He,

Isaak Lim,

Leif Kobbelt; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Elsner_2025_ICCV, author = {Elsner, Tim and Usinger, Paula and Nehring-Wirxel, Julius and Kobsik, Gregor and Czech, Victor and He, Yanjiang and Lim, Isaak and Kobbelt, Leif}, title = {Multidimensional Byte Pair Encoding: Shortened Sequences for Improved Visual Data Generation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21331-21341} }
All in One: Visual-Description-Guided Unified Point Cloud Segmentation: Zongyan Han,

Mohamed El Amine Boudjoghra,

Jiahua Dong,

Jinhong Wang,

Rao Muhammad Anwer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2025_ICCV, author = {Han, Zongyan and El Amine Boudjoghra, Mohamed and Dong, Jiahua and Wang, Jinhong and Anwer, Rao Muhammad}, title = {All in One: Visual-Description-Guided Unified Point Cloud Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24835-24845} }
Controllable-LPMoE: Adapting to Challenging Object Segmentation via Dynamic Local Priors from Mixture-of-Experts: Yanguang Sun,

Jiawei Lian,

Jian Yang,

Lei Luo; [pdf]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Yanguang and Lian, Jiawei and Yang, Jian and Luo, Lei}, title = {Controllable-LPMoE: Adapting to Challenging Object Segmentation via Dynamic Local Priors from Mixture-of-Experts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22327-22337} }
Frequency-Dynamic Attention Modulation For Dense Prediction: Linwei Chen,

Lin Gu,

Ying Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Linwei and Gu, Lin and Fu, Ying}, title = {Frequency-Dynamic Attention Modulation For Dense Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22620-22632} }
Semantic versus Identity: A Divide-and-Conquer Approach towards Adjustable Medical Image De-Identification: Yuan Tian,

Shuo Wang,

Rongzhao Zhang,

Zijian Chen,

Yankai Jiang,

Chunyi Li,

Xiangyang Zhu,

Fang Yan,

Qiang Hu,

XiaoSong Wang,

Guangtao Zhai; [pdf] [arXiv]
[bibtex]
@InProceedings{Tian_2025_ICCV, author = {Tian, Yuan and Wang, Shuo and Zhang, Rongzhao and Chen, Zijian and Jiang, Yankai and Li, Chunyi and Zhu, Xiangyang and Yan, Fang and Hu, Qiang and Wang, XiaoSong and Zhai, Guangtao}, title = {Semantic versus Identity: A Divide-and-Conquer Approach towards Adjustable Medical Image De-Identification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20613-20625} }
Describe Anything: Detailed Localized Image and Video Captioning: Long Lian,

Yifan Ding,

Yunhao Ge,

Sifei Liu,

Hanzi Mao,

Boyi Li,

Marco Pavone,

Ming-Yu Liu,

Trevor Darrell,

Adam Yala,

Yin Cui; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lian_2025_ICCV, author = {Lian, Long and Ding, Yifan and Ge, Yunhao and Liu, Sifei and Mao, Hanzi and Li, Boyi and Pavone, Marco and Liu, Ming-Yu and Darrell, Trevor and Yala, Adam and Cui, Yin}, title = {Describe Anything: Detailed Localized Image and Video Captioning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21766-21777} }
Point Cloud Self-supervised Learning via 3D to Multi-view Masked Learner: Zhimin Chen,

Xuewei Chen,

Xiao Guo,

Yingwei Li,

Longlong Jing,

Liang Yang,

Bing Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Zhimin and Chen, Xuewei and Guo, Xiao and Li, Yingwei and Jing, Longlong and Yang, Liang and Li, Bing}, title = {Point Cloud Self-supervised Learning via 3D to Multi-view Masked Learner}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27618-27629} }
Probabilistic Prototype Calibration of Vision-language Models for Generalized Few-shot Semantic Segmentation: Jie Liu,

Jiayi Shen,

Pan Zhou,

Jan-Jakob Sonke,

Efstratios Gavves; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Jie and Shen, Jiayi and Zhou, Pan and Sonke, Jan-Jakob and Gavves, Efstratios}, title = {Probabilistic Prototype Calibration of Vision-language Models for Generalized Few-shot Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21155-21165} }
Triad: Empowering LMM-based Anomaly Detection with Expert-guided Region-of-Interest Tokenizer and Manufacturing Process: Yuanze Li,

Shihao Yuan,

Haolin Wang,

Qizhang Li,

Ming Liu,

Chen Xu,

Guangming Shi,

Wangmeng Zuo; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Yuanze and Yuan, Shihao and Wang, Haolin and Li, Qizhang and Liu, Ming and Xu, Chen and Shi, Guangming and Zuo, Wangmeng}, title = {Triad: Empowering LMM-based Anomaly Detection with Expert-guided Region-of-Interest Tokenizer and Manufacturing Process}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21917-21926} }
When Anchors Meet Cold Diffusion: A Multi-Stage Approach to Lane Detection: Bo-Lun Huang,

Zi-Xiang Ni,

Feng-Kai Huang,

Hong-Han Shuai,

Wen-Huang Cheng; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_ICCV, author = {Huang, Bo-Lun and Ni, Zi-Xiang and Huang, Feng-Kai and Shuai, Hong-Han and Cheng, Wen-Huang}, title = {When Anchors Meet Cold Diffusion: A Multi-Stage Approach to Lane Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27917-27926} }
Breaking Grid Constraints: Dynamic Graph Reconstruction Network for Multi-organ Segmentation: Junhao Xiao,

Yang Wei,

Jingyu Wang,

Yongchao Wang,

Xiuli Bi,

Bin Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Xiao_2025_ICCV, author = {Xiao, Junhao and Wei, Yang and Wang, Jingyu and Wang, Yongchao and Bi, Xiuli and Xiao, Bin}, title = {Breaking Grid Constraints: Dynamic Graph Reconstruction Network for Multi-organ Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24413-24422} }
Bilateral Collaboration with Large Vision-Language Models for Open Vocabulary Human-Object Interaction Detection: Yupeng Hu,

Changxing Ding,

Chang Sun,

Shaoli Huang,

Xiangmin Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_ICCV, author = {Hu, Yupeng and Ding, Changxing and Sun, Chang and Huang, Shaoli and Xu, Xiangmin}, title = {Bilateral Collaboration with Large Vision-Language Models for Open Vocabulary Human-Object Interaction Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20126-20136} }
GaSLight: Gaussian Splats for Spatially-Varying Lighting in HDR: Christophe Bolduc,

Yannick Hold-Geoffroy,

Jean-François Lalonde; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bolduc_2025_ICCV, author = {Bolduc, Christophe and Hold-Geoffroy, Yannick and Lalonde, Jean-Fran\c{c}ois}, title = {GaSLight: Gaussian Splats for Spatially-Varying Lighting in HDR}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29120-29130} }
ATAS: Any-to-Any Self-Distillation for Enhanced Open-Vocabulary Dense Prediction: Juan Yeo,

Soonwoo Cha,

Jiwoo Song,

Hyunbin Jin,

Taesup Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yeo_2025_ICCV, author = {Yeo, Juan and Cha, Soonwoo and Song, Jiwoo and Jin, Hyunbin and Kim, Taesup}, title = {ATAS: Any-to-Any Self-Distillation for Enhanced Open-Vocabulary Dense Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20390-20400} }
Semantic-guided Camera Ray Regression for Visual Localization: Yesheng Zhang,

Xu Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yesheng and Zhao, Xu}, title = {Semantic-guided Camera Ray Regression for Visual Localization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25639-25648} }
Multi-View Slot Attention Using Paraphrased Texts for Face Anti-Spoofing: Jeongmin Yu,

Susang Kim,

Kisu Lee,

Taekyoung Kwon,

Won-Yong Shin,

Ha Young Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Jeongmin and Kim, Susang and Lee, Kisu and Kwon, Taekyoung and Shin, Won-Yong and Kim, Ha Young}, title = {Multi-View Slot Attention Using Paraphrased Texts for Face Anti-Spoofing}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21117-21128} }
GCRayDiffusion: Pose-Free Surface Reconstruction via Geometric Consistent Ray Diffusion: Li-Heng Chen,

Zi-Xin Zou,

Chang Liu,

Tianjiao Jing,

Yan-Pei Cao,

Shi-Sheng Huang,

Hongbo Fu,

Hua Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Li-Heng and Zou, Zi-Xin and Liu, Chang and Jing, Tianjiao and Cao, Yan-Pei and Huang, Shi-Sheng and Fu, Hongbo and Huang, Hua}, title = {GCRayDiffusion: Pose-Free Surface Reconstruction via Geometric Consistent Ray Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25335-25345} }
Continual Multiple Instance Learning with Enhanced Localization for Histopathological Whole Slide Image Analysis: Byung Hyun Lee,

Wongi Jeong,

Woojae Han,

Kyoungbun Lee,

Se Young Chun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Byung Hyun and Jeong, Wongi and Han, Woojae and Lee, Kyoungbun and Chun, Se Young}, title = {Continual Multiple Instance Learning with Enhanced Localization for Histopathological Whole Slide Image Analysis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23232-23242} }
Leveraging Debiased Cross-modal Attention Maps and Code-based Reasoning for Zero-shot Referring Expression Comprehension: Juntao Chen,

Wen Shen,

Zhihua Wei,

Lijun Sun,

Hongyun Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Juntao and Shen, Wen and Wei, Zhihua and Sun, Lijun and Zhang, Hongyun}, title = {Leveraging Debiased Cross-modal Attention Maps and Code-based Reasoning for Zero-shot Referring Expression Comprehension}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20413-20424} }
Instant GaussianImage: A Generalizable and Self-Adaptive Image Representation via 2D Gaussian Splatting: Zhaojie Zeng,

Yuesong Wang,

Tao Guan,

Chao Yang,

Lili Ju; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2025_ICCV, author = {Zeng, Zhaojie and Wang, Yuesong and Guan, Tao and Yang, Chao and Ju, Lili}, title = {Instant GaussianImage: A Generalizable and Self-Adaptive Image Representation via 2D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27896-27905} }
AcZeroTS: Active Learning for Zero-shot Tissue Segmentation in Pathology Images: Jiao Tang,

Junjie Zhou,

Bo Qian,

Peng Wan,

Yingli Zuo,

Wei Shao,

Daoqiang Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Tang_2025_ICCV, author = {Tang, Jiao and Zhou, Junjie and Qian, Bo and Wan, Peng and Zuo, Yingli and Shao, Wei and Zhang, Daoqiang}, title = {AcZeroTS: Active Learning for Zero-shot Tissue Segmentation in Pathology Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23508-23518} }
Modeling Saliency Dataset Bias: Matthias Kümmerer,

Harneet Singh Khanuja,

Matthias Bethge; [pdf] [supp]
[bibtex]
@InProceedings{Kummerer_2025_ICCV, author = {K\"ummerer, Matthias and Khanuja, Harneet Singh and Bethge, Matthias}, title = {Modeling Saliency Dataset Bias}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22077-22088} }
MCOP: Multi-UAV Collaborative Occupancy Prediction: Zefu Lin,

Wenbo Chen,

Xiaojuan Jin,

Yuran Yang,

Lue Fan,

Yixin Zhang,

Yufeng Zhang,

Zhaoxiang Zhang; [pdf]
[bibtex]
@InProceedings{Lin_2025_ICCV, author = {Lin, Zefu and Chen, Wenbo and Jin, Xiaojuan and Yang, Yuran and Fan, Lue and Zhang, Yixin and Zhang, Yufeng and Zhang, Zhaoxiang}, title = {MCOP: Multi-UAV Collaborative Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27242-27251} }
Tree Skeletonization from 3D Point Clouds by Denoising Diffusion: Elias Ariel Marks,

Lucas Nunes,

Federico Magistri,

Matteo Sodano,

Rodrigo Marcuzzi,

Lars Zimmermann,

Jens Behley,

Cyrill Stachniss; [pdf] [supp]
[bibtex]
@InProceedings{Marks_2025_ICCV, author = {Marks, Elias Ariel and Nunes, Lucas and Magistri, Federico and Sodano, Matteo and Marcuzzi, Rodrigo and Zimmermann, Lars and Behley, Jens and Stachniss, Cyrill}, title = {Tree Skeletonization from 3D Point Clouds by Denoising Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27607-27617} }
From Panels to Prose: Generating Literary Narratives from Comics: Ragav Sachdeva,

Andrew Zisserman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sachdeva_2025_ICCV, author = {Sachdeva, Ragav and Zisserman, Andrew}, title = {From Panels to Prose: Generating Literary Narratives from Comics}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21864-21873} }
VMem: Consistent Interactive Video Scene Generation with Surfel-Indexed View Memory: Runjia Li,

Philip Torr,

Andrea Vedaldi,

Tomas Jakab; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Runjia and Torr, Philip and Vedaldi, Andrea and Jakab, Tomas}, title = {VMem: Consistent Interactive Video Scene Generation with Surfel-Indexed View Memory}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25690-25699} }
Learning Null Geodesics for Gravitational Lensing Rendering in General Relativity: Mingyuan Sun,

Zheng Fang,

Jiaxu Wang,

Kunyi Zhang,

Qiang Zhang,

Renjing Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Mingyuan and Fang, Zheng and Wang, Jiaxu and Zhang, Kunyi and Zhang, Qiang and Xu, Renjing}, title = {Learning Null Geodesics for Gravitational Lensing Rendering in General Relativity}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28473-28482} }
Towards Accurate and Efficient 3D Object Detection for Autonomous Driving: A Mixture of Experts Computing System on Edge: Linshen Liu,

Boyan Su,

Junyue Jiang,

Guanlin Wu,

Cong Guo,

Ceyu Xu,

Hao Frank Yang; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Linshen and Su, Boyan and Jiang, Junyue and Wu, Guanlin and Guo, Cong and Xu, Ceyu and Yang, Hao Frank}, title = {Towards Accurate and Efficient 3D Object Detection for Autonomous Driving: A Mixture of Experts Computing System on Edge}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25903-25913} }
Uncertainty-Driven Expert Control: Enhancing the Reliability of Medical Vision-Language Models: Xiao Liang,

Di Wang,

Zhicheng Jiao,

Ronghan Li,

Pengfei Yang,

Quan Wang,

Tat-Seng Chua; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2025_ICCV, author = {Liang, Xiao and Wang, Di and Jiao, Zhicheng and Li, Ronghan and Yang, Pengfei and Wang, Quan and Chua, Tat-Seng}, title = {Uncertainty-Driven Expert Control: Enhancing the Reliability of Medical Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21144-21154} }
Mitigating Geometric Degradation in Fast DownSampling via FastAdapter for Point Cloud Segmentation: Shuofeng Sun,

Haibin Yan; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_ICCV, author = {Sun, Shuofeng and Yan, Haibin}, title = {Mitigating Geometric Degradation in Fast DownSampling via FastAdapter for Point Cloud Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25983-25992} }
Dual-S3D: Hierarchical Dual-Path Selective SSM-CNN for High-Fidelity Implicit Reconstruction: Luoxi Zhang,

Pragyan Shrestha,

Yu Zhou,

Chun Xie,

Itaru Kitahara; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Luoxi and Shrestha, Pragyan and Zhou, Yu and Xie, Chun and Kitahara, Itaru}, title = {Dual-S3D: Hierarchical Dual-Path Selective SSM-CNN for High-Fidelity Implicit Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25104-25113} }
Lidar Waveforms are Worth 40x128x33 Words: Dominik Scheuble,

Hanno Holzhüter,

Steven Peters,

Mario Bijelic,

Felix Heide; [pdf] [supp]
[bibtex]
@InProceedings{Scheuble_2025_ICCV, author = {Scheuble, Dominik and Holzh\"uter, Hanno and Peters, Steven and Bijelic, Mario and Heide, Felix}, title = {Lidar Waveforms are Worth 40x128x33 Words}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28913-28924} }
SALAD -- Semantics-Aware Logical Anomaly Detection: Matic Fučka,

Vitjan Zavrtanik,

Danijel Skočaj; [pdf] [supp]
[bibtex]
@InProceedings{Fucka_2025_ICCV, author = {Fu\v{c}ka, Matic and Zavrtanik, Vitjan and Sko\v{c}aj, Danijel}, title = {SALAD -- Semantics-Aware Logical Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21843-21852} }
ARGUS: Hallucination and Omission Evaluation in Video-LLMs: Ruchit Rawal,

Reza Shirkavand,

Heng Huang,

Gowthami Somepalli,

Tom Goldstein; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rawal_2025_ICCV, author = {Rawal, Ruchit and Shirkavand, Reza and Huang, Heng and Somepalli, Gowthami and Goldstein, Tom}, title = {ARGUS: Hallucination and Omission Evaluation in Video-LLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20280-20290} }
Group Inertial Poser: Multi-Person Pose and Global Translation from Sparse Inertial Sensors and Ultra-Wideband Ranging: Ying Xue,

Jiaxi Jiang,

Rayan Armani,

Dominik Hollidt,

Yi-Chi Liao,

Christian Holz; [pdf]
[bibtex]
@InProceedings{Xue_2025_ICCV, author = {Xue, Ying and Jiang, Jiaxi and Armani, Rayan and Hollidt, Dominik and Liao, Yi-Chi and Holz, Christian}, title = {Group Inertial Poser: Multi-Person Pose and Global Translation from Sparse Inertial Sensors and Ultra-Wideband Ranging}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24910-24921} }
NormalLoc: Visual Localization on Textureless 3D Models using Surface Normals: Jiro Abe,

Gaku Nakano,

Kazumine Ogura; [pdf] [supp]
[bibtex]
@InProceedings{Abe_2025_ICCV, author = {Abe, Jiro and Nakano, Gaku and Ogura, Kazumine}, title = {NormalLoc: Visual Localization on Textureless 3D Models using Surface Normals}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25421-25430} }
Domain-aware Category-level Geometry Learning Segmentation for 3D Point Clouds: Pei He,

Lingling Li,

Licheng Jiao,

Ronghua Shang,

Fang Liu,

Shuang Wang,

Xu Liu,

Wenping Ma; [pdf] [arXiv]
[bibtex]
@InProceedings{He_2025_ICCV, author = {He, Pei and Li, Lingling and Jiao, Licheng and Shang, Ronghua and Liu, Fang and Wang, Shuang and Liu, Xu and Ma, Wenping}, title = {Domain-aware Category-level Geometry Learning Segmentation for 3D Point Clouds}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28324-28333} }
Improving SAM for Camouflaged Object Detection via Dual Stream Adapters: Jiaming Liu,

Linghe Kong,

Guihai Chen; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Jiaming and Kong, Linghe and Chen, Guihai}, title = {Improving SAM for Camouflaged Object Detection via Dual Stream Adapters}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21906-21916} }
DiSCO-3D : Discovering and Segmenting Sub-Concepts from Open-vocabulary Queries in NeRF: Doriand Petit,

Steve Bourgeois,

Vincent Gay-Bellile,

Florian Chabot,

Loïc Barthe; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Petit_2025_ICCV, author = {Petit, Doriand and Bourgeois, Steve and Gay-Bellile, Vincent and Chabot, Florian and Barthe, Lo{\"\i}c}, title = {DiSCO-3D : Discovering and Segmenting Sub-Concepts from Open-vocabulary Queries in NeRF}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20043-20052} }
VCA: Video Curious Agent for Long Video Understanding: Zeyuan Yang,

Delin Chen,

Xueyang Yu,

Maohao Shen,

Chuang Gan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Zeyuan and Chen, Delin and Yu, Xueyang and Shen, Maohao and Gan, Chuang}, title = {VCA: Video Curious Agent for Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20168-20179} }
ForeSight: Multi-View Streaming Joint Object Detection and Trajectory Forecasting: Sandro Papais,

Letian Wang,

Brian Cheong,

Steven L. Waslander; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Papais_2025_ICCV, author = {Papais, Sandro and Wang, Letian and Cheong, Brian and Waslander, Steven L.}, title = {ForeSight: Multi-View Streaming Joint Object Detection and Trajectory Forecasting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25474-25484} }
SG-LDM: Semantic-Guided LiDAR Generation via Latent-Aligned Diffusion: Zhengkang Xiang,

Zizhao Li,

Amir Khodabandeh,

Kourosh Khoshelham; [pdf] [supp]
[bibtex]
@InProceedings{Xiang_2025_ICCV, author = {Xiang, Zhengkang and Li, Zizhao and Khodabandeh, Amir and Khoshelham, Kourosh}, title = {SG-LDM: Semantic-Guided LiDAR Generation via Latent-Aligned Diffusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24965-24976} }
Dynamic-DINO: Fine-Grained Mixture of Experts Tuning for Real-time Open-Vocabulary Object Detection: Yehao Lu,

Minghe Weng,

Zekang Xiao,

Rui Jiang,

Wei Su,

Guangcong Zheng,

Ping Lu,

Xi Li; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Yehao and Weng, Minghe and Xiao, Zekang and Jiang, Rui and Su, Wei and Zheng, Guangcong and Lu, Ping and Li, Xi}, title = {Dynamic-DINO: Fine-Grained Mixture of Experts Tuning for Real-time Open-Vocabulary Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20847-20856} }
BridgeDepth: Bridging Monocular and Stereo Reasoning with Latent Alignment: Tongfan Guan,

Jiaxin Guo,

Chen Wang,

Yun-Hui Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guan_2025_ICCV, author = {Guan, Tongfan and Guo, Jiaxin and Wang, Chen and Liu, Yun-Hui}, title = {BridgeDepth: Bridging Monocular and Stereo Reasoning with Latent Alignment}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27681-27691} }
ViLLa: Video Reasoning Segmentation with Large Language Model: Rongkun Zheng,

Lu Qi,

Xi Chen,

Yi Wang,

Kun Wang,

Hengshuang Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Rongkun and Qi, Lu and Chen, Xi and Wang, Yi and Wang, Kun and Zhao, Hengshuang}, title = {ViLLa: Video Reasoning Segmentation with Large Language Model}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23667-23677} }
RARE: Refine Any Registration of Pairwise Point Clouds via Zero-Shot Learning: Chengyu Zheng,

Jin Huang,

Honghua Chen,

Mingqiang Wei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Chengyu and Huang, Jin and Chen, Honghua and Wei, Mingqiang}, title = {RARE: Refine Any Registration of Pairwise Point Clouds via Zero-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26549-26558} }
IntrinsicControlNet: Cross-distribution Image Generation with Real and Unreal: Jiayuan Lu,

Rengan Xie,

Zixuan Xie,

Zhizhen Wu,

Dianbing Xi,

Qi Ye,

Rui Wang,

Hujun Bao,

Yuchi Huo; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Jiayuan and Xie, Rengan and Xie, Zixuan and Wu, Zhizhen and Xi, Dianbing and Ye, Qi and Wang, Rui and Bao, Hujun and Huo, Yuchi}, title = {IntrinsicControlNet: Cross-distribution Image Generation with Real and Unreal}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27315-27325} }
Flash-VStream: Efficient Real-Time Understanding for Long Video Streams: Haoji Zhang,

Yiqin Wang,

Yansong Tang,

Yong Liu,

Jiashi Feng,

Xiaojie Jin; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Haoji and Wang, Yiqin and Tang, Yansong and Liu, Yong and Feng, Jiashi and Jin, Xiaojie}, title = {Flash-VStream: Efficient Real-Time Understanding for Long Video Streams}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21059-21069} }
GroundFlow: A Plug-in Module for Temporal Reasoning on 3D Point Cloud Sequential Grounding: Zijun Lin,

Shuting He,

Cheston Tan,

Bihan Wen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_ICCV, author = {Lin, Zijun and He, Shuting and Tan, Cheston and Wen, Bihan}, title = {GroundFlow: A Plug-in Module for Temporal Reasoning on 3D Point Cloud Sequential Grounding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28774-28784} }
DAA*: Deep Angular A Star for Image-based Path Planning: Zhiwei Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_ICCV, author = {Xu, Zhiwei}, title = {DAA*: Deep Angular A Star for Image-based Path Planning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25284-25293} }
Perspective-Invariant 3D Object Detection: Ao Liang,

Lingdong Kong,

Dongyue Lu,

Youquan Liu,

Jian Fang,

Huaici Zhao,

Wei Tsang Ooi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2025_ICCV, author = {Liang, Ao and Kong, Lingdong and Lu, Dongyue and Liu, Youquan and Fang, Jian and Zhao, Huaici and Ooi, Wei Tsang}, title = {Perspective-Invariant 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27725-27738} }
MixA: A Mixed Attention approach with Stable Lightweight Linear Attention to enhance Efficiency of Vision Transformers at the Edge: Sabbir Ahmed,

Jingtao Li,

Weiming Zhuang,

Chen Chen,

Lingjuan Lyu; [pdf] [supp]
[bibtex]
@InProceedings{Ahmed_2025_ICCV, author = {Ahmed, Sabbir and Li, Jingtao and Zhuang, Weiming and Chen, Chen and Lyu, Lingjuan}, title = {MixA: A Mixed Attention approach with Stable Lightweight Linear Attention to enhance Efficiency of Vision Transformers at the Edge}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21187-21196} }
LIRA: Inferring Segmentation in Large Multi-modal Models with Local Interleaved Region Assistance: Zhang Li,

Biao Yang,

Qiang Liu,

Shuo Zhang,

Zhiyin Ma,

Liang Yin,

Linger Deng,

Yabo Sun,

Yuliang Liu,

Xiang Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Zhang and Yang, Biao and Liu, Qiang and Zhang, Shuo and Ma, Zhiyin and Yin, Liang and Deng, Linger and Sun, Yabo and Liu, Yuliang and Bai, Xiang}, title = {LIRA: Inferring Segmentation in Large Multi-modal Models with Local Interleaved Region Assistance}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24056-24067} }
Adapting In-Domain Few-Shot Segmentation to New Domains without Source Domain Retraining: Qi Fan,

Kaiqi Liu,

Nian Liu,

Hisham Cholakkal,

Rao Muhammad Anwer,

Wenbin Li,

Yang Gao; [pdf]
[bibtex]
@InProceedings{Fan_2025_ICCV, author = {Fan, Qi and Liu, Kaiqi and Liu, Nian and Cholakkal, Hisham and Anwer, Rao Muhammad and Li, Wenbin and Gao, Yang}, title = {Adapting In-Domain Few-Shot Segmentation to New Domains without Source Domain Retraining}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21429-21439} }
HUG: Hierarchical Urban Gaussian Splatting with Block-Based Reconstruction for Large-Scale Aerial Scenes: Mai Su,

Zhongtao Wang,

Huishan Au,

Yilong Li,

Xizhe Cao,

Chengwei Pan,

Yisong Chen,

Guoping Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Su_2025_ICCV, author = {Su, Mai and Wang, Zhongtao and Au, Huishan and Li, Yilong and Cao, Xizhe and Pan, Chengwei and Chen, Yisong and Wang, Guoping}, title = {HUG: Hierarchical Urban Gaussian Splatting with Block-Based Reconstruction for Large-Scale Aerial Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28839-28848} }
Seam360GS: Seamless 360deg Gaussian Splatting from Real-World Omnidirectional Images: Changha Shin,

Woong Oh Cho,

Seon Joo Kim; [pdf] [supp]
[bibtex]
@InProceedings{Shin_2025_ICCV, author = {Shin, Changha and Cho, Woong Oh and Kim, Seon Joo}, title = {Seam360GS: Seamless 360deg Gaussian Splatting from Real-World Omnidirectional Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28970-28979} }
Talking to DINO: Bridging Self-Supervised Vision Backbones with Language for Open-Vocabulary Segmentation: Luca Barsellotti,

Lorenzo Bianchi,

Nicola Messina,

Fabio Carrara,

Marcella Cornia,

Lorenzo Baraldi,

Fabrizio Falchi,

Rita Cucchiara; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Barsellotti_2025_ICCV, author = {Barsellotti, Luca and Bianchi, Lorenzo and Messina, Nicola and Carrara, Fabio and Cornia, Marcella and Baraldi, Lorenzo and Falchi, Fabrizio and Cucchiara, Rita}, title = {Talking to DINO: Bridging Self-Supervised Vision Backbones with Language for Open-Vocabulary Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22025-22035} }
ROAR: Reducing Inversion Error in Generative Image Watermarking: Hanyi Wang,

Han Fang,

Shi-Lin Wang,

Ee-Chien Chang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Hanyi and Fang, Han and Wang, Shi-Lin and Chang, Ee-Chien}, title = {ROAR: Reducing Inversion Error in Generative Image Watermarking}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19742-19751} }
Learning Beyond Still Frames: Scaling Vision-Language Models with Video: Yiyuan Zhang,

Handong Li,

Jing Liu,

Xiangyu Yue; [pdf]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Yiyuan and Li, Handong and Liu, Jing and Yue, Xiangyu}, title = {Learning Beyond Still Frames: Scaling Vision-Language Models with Video}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22425-22435} }
MultiADS: Defect-aware Supervision for Multi-type Anomaly Detection and Segmentation in Zero-Shot Learning: Ylli Sadikaj,

Hongkuan Zhou,

Lavdim Halilaj,

Stefan Schmid,

Steffen Staab,

Claudia Plant; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sadikaj_2025_ICCV, author = {Sadikaj, Ylli and Zhou, Hongkuan and Halilaj, Lavdim and Schmid, Stefan and Staab, Steffen and Plant, Claudia}, title = {MultiADS: Defect-aware Supervision for Multi-type Anomaly Detection and Segmentation in Zero-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22978-22988} }
SEGS-SLAM: Structure-enhanced 3D Gaussian Splatting SLAM with Appearance Embedding: Tianci Wen,

Zhiang Liu,

Yongchun Fang; [pdf] [supp]
[bibtex]
@InProceedings{Wen_2025_ICCV, author = {Wen, Tianci and Liu, Zhiang and Fang, Yongchun}, title = {SEGS-SLAM: Structure-enhanced 3D Gaussian Splatting SLAM with Appearance Embedding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28103-28113} }
Images as Noisy Labels: Unleashing the Potential of the Diffusion Model for Open-Vocabulary Semantic Segmentation: Fan Li,

Xuanbin Wang,

Xuan Wang,

Zhaoxiang Zhang,

Yuelei Xu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Fan and Wang, Xuanbin and Wang, Xuan and Zhang, Zhaoxiang and Xu, Yuelei}, title = {Images as Noisy Labels: Unleashing the Potential of the Diffusion Model for Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24255-24265} }
Tile-wise vs. Image-wise: Random-Tile Loss and Training Paradigm for Gaussian Splatting: Xiaoyu Zhang,

Weihong Pan,

Xiaojun Xiang,

Hongjia Zhai,

Liyang Zhou,

Hanqing Jiang,

Guofeng Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Xiaoyu and Pan, Weihong and Xiang, Xiaojun and Zhai, Hongjia and Zhou, Liyang and Jiang, Hanqing and Zhang, Guofeng}, title = {Tile-wise vs. Image-wise: Random-Tile Loss and Training Paradigm for Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26923-26932} }
Super Resolved Imaging with Adaptive Optics: Robin Swanson,

Esther Y. H. Lin,

Masen Lamb,

Suresh Sivanandam,

Kiriakos N. Kutulakos; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Swanson_2025_ICCV, author = {Swanson, Robin and Lin, Esther Y. H. and Lamb, Masen and Sivanandam, Suresh and Kutulakos, Kiriakos N.}, title = {Super Resolved Imaging with Adaptive Optics}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {29142-29152} }
BillBoard Splatting (BBSplat): Learnable Textured Primitives for Novel View Synthesis: David Svitov,

Pietro Morerio,

Lourdes Agapito,

Alessio Del Bue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Svitov_2025_ICCV, author = {Svitov, David and Morerio, Pietro and Agapito, Lourdes and Del Bue, Alessio}, title = {BillBoard Splatting (BBSplat): Learnable Textured Primitives for Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25029-25039} }
SC-Lane: Slope-aware and Consistent Road Height Estimation Framework for 3D Lane Detection: Chaesong Park,

Eunbin Seo,

Jihyeon Hwang,

Jongwoo Lim; [pdf] [supp]
[bibtex]
@InProceedings{Park_2025_ICCV, author = {Park, Chaesong and Seo, Eunbin and Hwang, Jihyeon and Lim, Jongwoo}, title = {SC-Lane: Slope-aware and Consistent Road Height Estimation Framework for 3D Lane Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28407-28416} }
MedSegFactory: Text-Guided Generation of Medical Image-Mask Pairs: Jiawei Mao,

Yuhan Wang,

Yucheng Tang,

Daguang Xu,

Kang Wang,

Yang Yang,

Zongwei Zhou,

Yuyin Zhou; [pdf] [arXiv]
[bibtex]
@InProceedings{Mao_2025_ICCV, author = {Mao, Jiawei and Wang, Yuhan and Tang, Yucheng and Xu, Daguang and Wang, Kang and Yang, Yang and Zhou, Zongwei and Zhou, Yuyin}, title = {MedSegFactory: Text-Guided Generation of Medical Image-Mask Pairs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21525-21535} }
ReAL-AD: Towards Human-Like Reasoning in End-to-End Autonomous Driving: Yuhang Lu,

Jiadong Tu,

Yuexin Ma,

Xinge Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Yuhang and Tu, Jiadong and Ma, Yuexin and Zhu, Xinge}, title = {ReAL-AD: Towards Human-Like Reasoning in End-to-End Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27783-27793} }
ReME: A Data-Centric Framework for Training-Free Open-Vocabulary Segmentation: Xiwei Xuan,

Ziquan Deng,

Kwan-Liu Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xuan_2025_ICCV, author = {Xuan, Xiwei and Deng, Ziquan and Ma, Kwan-Liu}, title = {ReME: A Data-Centric Framework for Training-Free Open-Vocabulary Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20954-20965} }
A Visual Leap in CLIP Compositionality Reasoning through Generation of Counterfactual Sets: Zexi Jia,

Chuanwei Huang,

Hongyan Fei,

Yeshuang Zhu,

Zhiqiang Yuan,

Ying Deng,

Jiapei Zhang,

Jinchao Zhang,

Jie Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jia_2025_ICCV, author = {Jia, Zexi and Huang, Chuanwei and Fei, Hongyan and Zhu, Yeshuang and Yuan, Zhiqiang and Deng, Ying and Zhang, Jiapei and Zhang, Jinchao and Zhou, Jie}, title = {A Visual Leap in CLIP Compositionality Reasoning through Generation of Counterfactual Sets}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23498-23507} }
Superpowering Open-Vocabulary Object Detectors for X-ray Vision: Pablo Garcia-Fernandez,

Lorenzo Vaquero,

Mingxuan Liu,

Feng Xue,

Daniel Cores,

Nicu Sebe,

Manuel Mucientes,

Elisa Ricci; [pdf] [supp]
[bibtex]
@InProceedings{Garcia-Fernandez_2025_ICCV, author = {Garcia-Fernandez, Pablo and Vaquero, Lorenzo and Liu, Mingxuan and Xue, Feng and Cores, Daniel and Sebe, Nicu and Mucientes, Manuel and Ricci, Elisa}, title = {Superpowering Open-Vocabulary Object Detectors for X-ray Vision}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20770-20779} }
LVBench: An Extreme Long Video Understanding Benchmark: Weihan Wang,

Zehai He,

Wenyi Hong,

Yean Cheng,

Xiaohan Zhang,

Ji Qi,

Ming Ding,

Xiaotao Gu,

Shiyu Huang,

Bin Xu,

Yuxiao Dong,

Jie Tang; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Weihan and He, Zehai and Hong, Wenyi and Cheng, Yean and Zhang, Xiaohan and Qi, Ji and Ding, Ming and Gu, Xiaotao and Huang, Shiyu and Xu, Bin and Dong, Yuxiao and Tang, Jie}, title = {LVBench: An Extreme Long Video Understanding Benchmark}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22958-22967} }
SpatialCrafter: Unleashing the Imagination of Video Diffusion Models for Scene Reconstruction from Limited Observations: Songchun Zhang,

Huiyao Xu,

Sitong Guo,

Zhongwei Xie,

Hujun Bao,

Weiwei Xu,

Changqing Zou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Songchun and Xu, Huiyao and Guo, Sitong and Xie, Zhongwei and Bao, Hujun and Xu, Weiwei and Zou, Changqing}, title = {SpatialCrafter: Unleashing the Imagination of Video Diffusion Models for Scene Reconstruction from Limited Observations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27794-27805} }
Multi-Granular Spatio-Temporal Token Merging for Training-Free Acceleration of Video LLMs: Jeongseok Hyun,

Sukjun Hwang,

Su Ho Han,

Taeoh Kim,

Inwoong Lee,

Dongyoon Wee,

Joon-Young Lee,

Seon Joo Kim,

Minho Shim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hyun_2025_ICCV, author = {Hyun, Jeongseok and Hwang, Sukjun and Han, Su Ho and Kim, Taeoh and Lee, Inwoong and Wee, Dongyoon and Lee, Joon-Young and Kim, Seon Joo and Shim, Minho}, title = {Multi-Granular Spatio-Temporal Token Merging for Training-Free Acceleration of Video LLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23990-24000} }
Inverse Image-Based Rendering for Light Field Generation from Single Images: Hyunjun Jung,

Hae-Gon Jeon; [pdf] [supp]
[bibtex]
@InProceedings{Jung_2025_ICCV, author = {Jung, Hyunjun and Jeon, Hae-Gon}, title = {Inverse Image-Based Rendering for Light Field Generation from Single Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24739-24749} }
Generalized and Efficient 2D Gaussian Splatting for Arbitrary-scale Super-Resolution: Du Chen,

Liyi Chen,

Zhengqiang Zhang,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Du and Chen, Liyi and Zhang, Zhengqiang and Zhang, Lei}, title = {Generalized and Efficient 2D Gaussian Splatting for Arbitrary-scale Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26435-26445} }
Pretend Benign: A Stealthy Adversarial Attack by Exploiting Vulnerabilities in Cooperative Perception: Hongwei Lin,

Dongyu Pan,

Qiming Xia,

Hai Wu,

Cheng Wang,

Siqi Shen,

Chenglu Wen; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2025_ICCV, author = {Lin, Hongwei and Pan, Dongyu and Xia, Qiming and Wu, Hai and Wang, Cheng and Shen, Siqi and Wen, Chenglu}, title = {Pretend Benign: A Stealthy Adversarial Attack by Exploiting Vulnerabilities in Cooperative Perception}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {19947-19956} }
ProbMED: A Probabilistic Framework for Medical Multimodal Binding: Yuan Gao,

Sangwook Kim,

Jianzhong You,

Chris McIntosh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_ICCV, author = {Gao, Yuan and Kim, Sangwook and You, Jianzhong and McIntosh, Chris}, title = {ProbMED: A Probabilistic Framework for Medical Multimodal Binding}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20157-20167} }
QuickSplat: Fast 3D Surface Reconstruction via Learned Gaussian Initialization: Yueh-Cheng Liu,

Lukas Höllein,

Matthias Nießner,

Angela Dai; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_ICCV, author = {Liu, Yueh-Cheng and H\"ollein, Lukas and Nie{\ss}ner, Matthias and Dai, Angela}, title = {QuickSplat: Fast 3D Surface Reconstruction via Learned Gaussian Initialization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27851-27861} }
MetaScope: Optics-Driven Neural Network for Ultra-Micro Metalens Endoscopy: Wuyang Li,

Wentao Pan,

Xiaoyuan Liu,

Zhendong Luo,

Chenxin Li,

Hengyu Liu,

Din Ping Tsai,

Mu Ku Chen,

Yixuan Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_ICCV, author = {Li, Wuyang and Pan, Wentao and Liu, Xiaoyuan and Luo, Zhendong and Li, Chenxin and Liu, Hengyu and Tsai, Din Ping and Chen, Mu Ku and Yuan, Yixuan}, title = {MetaScope: Optics-Driven Neural Network for Ultra-Micro Metalens Endoscopy}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25938-25950} }
SparseRecon: Neural Implicit Surface Reconstruction from Sparse Views with Feature and Depth Consistencies: Liang Han,

Xu Zhang,

Haichuan Song,

Kanle Shi,

Yu-Shen Liu,

Zhizhong Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2025_ICCV, author = {Han, Liang and Zhang, Xu and Song, Haichuan and Shi, Kanle and Liu, Yu-Shen and Han, Zhizhong}, title = {SparseRecon: Neural Implicit Surface Reconstruction from Sparse Views with Feature and Depth Consistencies}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28514-28524} }
Leveraging the Power of MLLMs for Gloss-Free Sign Language Translation: Jungeun Kim,

Hyeongwoo Jeon,

Jongseong Bae,

Ha Young Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_ICCV, author = {Kim, Jungeun and Jeon, Hyeongwoo and Bae, Jongseong and Kim, Ha Young}, title = {Leveraging the Power of MLLMs for Gloss-Free Sign Language Translation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21048-21058} }
UniMLVG: Unified Framework for Multi-view Long Video Generation with Comprehensive Control Capabilities for Autonomous Driving: Rui Chen,

Zehuan Wu,

Yichen Liu,

Yuxin Guo,

Jingcheng Ni,

Haifeng Xia,

Siyu Xia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Rui and Wu, Zehuan and Liu, Yichen and Guo, Yuxin and Ni, Jingcheng and Xia, Haifeng and Xia, Siyu}, title = {UniMLVG: Unified Framework for Multi-view Long Video Generation with Comprehensive Control Capabilities for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25453-25463} }
CogCM: Cognition-Inspired Contextual Modeling for Audio-Visual Speech Enhancement: Feixiang Wang,

Shuang Yang,

Shiguang Shan,

Xilin Chen; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Feixiang and Yang, Shuang and Shan, Shiguang and Chen, Xilin}, title = {CogCM: Cognition-Inspired Contextual Modeling for Audio-Visual Speech Enhancement}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21408-21418} }
Uncertainty-Aware Diffusion-Guided Refinement of 3D Scenes: Sarosij Bose,

Arindam Dutta,

Sayak Nag,

Junge Zhang,

Jiachen Li,

Konstantinos Karydis,

Amit K. Roy-Chowdhury; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bose_2025_ICCV, author = {Bose, Sarosij and Dutta, Arindam and Nag, Sayak and Zhang, Junge and Li, Jiachen and Karydis, Konstantinos and Roy-Chowdhury, Amit K.}, title = {Uncertainty-Aware Diffusion-Guided Refinement of 3D Scenes}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28271-28281} }
LLaFEA: Frame-Event Complementary Fusion for Fine-Grained Spatiotemporal Understanding in LMMs: Hanyu Zhou,

Gim Hee Lee; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_ICCV, author = {Zhou, Hanyu and Lee, Gim Hee}, title = {LLaFEA: Frame-Event Complementary Fusion for Fine-Grained Spatiotemporal Understanding in LMMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22294-22304} }
FastPoint: Accelerating 3D Point Cloud Model Inference via Sample Point Distance Prediction: Donghyun Lee,

Dawoon Jeong,

Jae W. Lee,

Hongil Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_ICCV, author = {Lee, Donghyun and Jeong, Dawoon and Lee, Jae W. and Yoon, Hongil}, title = {FastPoint: Accelerating 3D Point Cloud Model Inference via Sample Point Distance Prediction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25114-25123} }
GSRecon: Efficient Generalizable Gaussian Splatting for Surface Reconstruction from Sparse Views: Hang Yang,

Le Hui,

Jianjun Qian,

Jin Xie,

Jian Yang; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Hang and Hui, Le and Qian, Jianjun and Xie, Jin and Yang, Jian}, title = {GSRecon: Efficient Generalizable Gaussian Splatting for Surface Reconstruction from Sparse Views}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {25346-25356} }
Beyond [cls]: Exploring the True Potential of Masked Image Modeling Representations: Marcin Przewięźlikowski,

Randall Balestriero,

Wojciech Jasiński,

Marek Śmieja,

Bartosz Zieliński; [pdf] [supp]
[bibtex]
@InProceedings{Przewiezlikowski_2025_ICCV, author = {Przewi\k{e}\'zlikowski, Marcin and Balestriero, Randall and Jasi\'nski, Wojciech and \'Smieja, Marek and Zieli\'nski, Bartosz}, title = {Beyond [cls]: Exploring the True Potential of Masked Image Modeling Representations}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23442-23452} }
SpikePack: Enhanced Information Flow in Spiking Neural Networks with High Hardware Compatibility: Guobin Shen,

Jindong Li,

Tenglong Li,

Dongcheng Zhao,

Yi Zeng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shen_2025_ICCV, author = {Shen, Guobin and Li, Jindong and Li, Tenglong and Zhao, Dongcheng and Zeng, Yi}, title = {SpikePack: Enhanced Information Flow in Spiking Neural Networks with High Hardware Compatibility}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23385-23395} }
VFlowOpt: A Token Pruning Framework for LMMs with Visual Information Flow-Guided Optimization: Sihan Yang,

Runsen Xu,

Chenhang Cui,

Tai Wang,

Dahua Lin,

Jiangmiao Pang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_ICCV, author = {Yang, Sihan and Xu, Runsen and Cui, Chenhang and Wang, Tai and Lin, Dahua and Pang, Jiangmiao}, title = {VFlowOpt: A Token Pruning Framework for LMMs with Visual Information Flow-Guided Optimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23924-23934} }
Text-guided Visual Prompt DINO for Generic Segmentation: Yuchen Guan,

Chong Sun,

Canmiao Fu,

Zhipeng Huang,

Chun Yuan,

Chen Li; [pdf] [arXiv]
[bibtex]
@InProceedings{Guan_2025_ICCV, author = {Guan, Yuchen and Sun, Chong and Fu, Canmiao and Huang, Zhipeng and Yuan, Chun and Li, Chen}, title = {Text-guided Visual Prompt DINO for Generic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21288-21298} }
InfiniCube: Unbounded and Controllable Dynamic 3D Driving Scene Generation with World-Guided Video Models: Yifan Lu,

Xuanchi Ren,

Jiawei Yang,

Tianchang Shen,

Zhangjie Wu,

Jun Gao,

Yue Wang,

Siheng Chen,

Mike Chen,

Sanja Fidler,

Jiahui Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_ICCV, author = {Lu, Yifan and Ren, Xuanchi and Yang, Jiawei and Shen, Tianchang and Wu, Zhangjie and Gao, Jun and Wang, Yue and Chen, Siheng and Chen, Mike and Fidler, Sanja and Huang, Jiahui}, title = {InfiniCube: Unbounded and Controllable Dynamic 3D Driving Scene Generation with World-Guided Video Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27272-27283} }
CABLD: Contrast-Agnostic Brain Landmark Detection with Consistency-Based Regularization: Soorena Salari,

Arash Harirpoush,

Hassan Rivaz,

Yiming Xiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Salari_2025_ICCV, author = {Salari, Soorena and Harirpoush, Arash and Rivaz, Hassan and Xiao, Yiming}, title = {CABLD: Contrast-Agnostic Brain Landmark Detection with Consistency-Based Regularization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20991-21002} }
A Differentiable Wave Optics Model for End-to-End Computational Imaging System Optimization: Chi-Jui Ho,

Yash Belhe,

Steve Rotenberg,

Ravi Ramamoorthi,

Tzu-Mao Li,

Nicholas Antipa; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ho_2025_ICCV, author = {Ho, Chi-Jui and Belhe, Yash and Rotenberg, Steve and Ramamoorthi, Ravi and Li, Tzu-Mao and Antipa, Nicholas}, title = {A Differentiable Wave Optics Model for End-to-End Computational Imaging System Optimization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28042-28051} }
DoppDrive: Doppler-Driven Temporal Aggregation for Improved Radar Object Detection: Yuval Haitman,

Oded Bialer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Haitman_2025_ICCV, author = {Haitman, Yuval and Bialer, Oded}, title = {DoppDrive: Doppler-Driven Temporal Aggregation for Improved Radar Object Detection}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26085-26094} }
YOLOE: Real-Time Seeing Anything: Ao Wang,

Lihao Liu,

Hui Chen,

Zijia Lin,

Jungong Han,

Guiguang Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Ao and Liu, Lihao and Chen, Hui and Lin, Zijia and Han, Jungong and Ding, Guiguang}, title = {YOLOE: Real-Time Seeing Anything}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24591-24602} }
Beyond Text-Visual Attention: Exploiting Visual Cues for Effective Token Pruning in VLMs: Qizhe Zhang,

Aosong Cheng,

Ming Lu,

Renrui Zhang,

Zhiyong Zhuo,

Jiajun Cao,

Shaobo Guo,

Qi She,

Shanghang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_ICCV, author = {Zhang, Qizhe and Cheng, Aosong and Lu, Ming and Zhang, Renrui and Zhuo, Zhiyong and Cao, Jiajun and Guo, Shaobo and She, Qi and Zhang, Shanghang}, title = {Beyond Text-Visual Attention: Exploiting Visual Cues for Effective Token Pruning in VLMs}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20857-20867} }
Griffon v2: Advancing Multimodal Perception with High-Resolution Scaling and Visual-Language Co-Referring: Yufei Zhan,

Shurong Zheng,

Yousong Zhu,

Hongyin Zhao,

Fan Yang,

Ming Tang,

Jinqiao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhan_2025_ICCV, author = {Zhan, Yufei and Zheng, Shurong and Zhu, Yousong and Zhao, Hongyin and Yang, Fan and Tang, Ming and Wang, Jinqiao}, title = {Griffon v2: Advancing Multimodal Perception with High-Resolution Scaling and Visual-Language Co-Referring}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22947-22957} }
SDFormer: Vision-based 3D Semantic Scene Completion via SAM-assisted Dual-channel Voxel Transformer: Yujie Xue,

Huilong Pi,

Jiapeng Zhang,

Yunchuan Qin,

Zhuo Tang,

Kenli Li,

Ruihui Li; [pdf] [supp]
[bibtex]
@InProceedings{Xue_2025_ICCV, author = {Xue, Yujie and Pi, Huilong and Zhang, Jiapeng and Qin, Yunchuan and Tang, Zhuo and Li, Kenli and Li, Ruihui}, title = {SDFormer: Vision-based 3D Semantic Scene Completion via SAM-assisted Dual-channel Voxel Transformer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26837-26847} }
ForestFormer3D: A Unified Framework for End-to-End Segmentation of Forest LiDAR 3D Point Clouds: Binbin Xiang,

Maciej Wielgosz,

Stefano Puliti,

Kamil Král,

Martin Krůček,

Azim Missarov,

Rasmus Astrup; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiang_2025_ICCV, author = {Xiang, Binbin and Wielgosz, Maciej and Puliti, Stefano and Kr\'al, Kamil and Kr\r{u}\v{c}ek, Martin and Missarov, Azim and Astrup, Rasmus}, title = {ForestFormer3D: A Unified Framework for End-to-End Segmentation of Forest LiDAR 3D Point Clouds}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24717-24727} }
AccidentalGS: 3D Gaussian Splatting from Accidental Camera Motion: Mao Mao,

Xujie Shen,

Guyuan Chen,

Boming Zhao,

Jiarui Hu,

Hujun Bao,

Zhaopeng Cui; [pdf] [supp]
[bibtex]
@InProceedings{Mao_2025_ICCV, author = {Mao, Mao and Shen, Xujie and Chen, Guyuan and Zhao, Boming and Hu, Jiarui and Bao, Hujun and Cui, Zhaopeng}, title = {AccidentalGS: 3D Gaussian Splatting from Accidental Camera Motion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27445-27455} }
Neuroverse3D: Developing In-Context Learning Universal Model for Neuroimaging in 3D: Jiesi Hu,

Hanyang Peng,

Yanwu Yang,

Xutao Guo,

Yang Shang,

Pengcheng Shi,

Chenfei Ye,

Ting Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_ICCV, author = {Hu, Jiesi and Peng, Hanyang and Yang, Yanwu and Guo, Xutao and Shang, Yang and Shi, Pengcheng and Ye, Chenfei and Ma, Ting}, title = {Neuroverse3D: Developing In-Context Learning Universal Model for Neuroimaging in 3D}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21721-21731} }
ProSAM: Enhancing the Robustness of SAM-based Visual Reference Segmentation with Probabilistic Prompts: Xiaoqi Wang,

Clint Sebastian,

Wenbin He,

Liu Ren; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_ICCV, author = {Wang, Xiaoqi and Sebastian, Clint and He, Wenbin and Ren, Liu}, title = {ProSAM: Enhancing the Robustness of SAM-based Visual Reference Segmentation with Probabilistic Prompts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {20487-20496} }
Allowing Oscillation Quantization: Overcoming Solution Space Limitation in Low Bit-Width Quantization: Weiying Xie,

Zihan Meng,

Jitao Ma,

Wenjin Guo,

Haowei Li,

Haonan Qin,

Leyuan Fang,

Yunsong Li; [pdf]
[bibtex]
@InProceedings{Xie_2025_ICCV, author = {Xie, Weiying and Meng, Zihan and Ma, Jitao and Guo, Wenjin and Li, Haowei and Qin, Haonan and Fang, Leyuan and Li, Yunsong}, title = {Allowing Oscillation Quantization: Overcoming Solution Space Limitation in Low Bit-Width Quantization}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24615-24624} }
SAMora: Enhancing SAM through Hierarchical Self-Supervised Pre-Training for Medical Images: Shuhang Chen,

Hangjie Yuan,

Pengwei Liu,

Hanxue Gu,

Tao Feng,

Dong Ni; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Shuhang and Yuan, Hangjie and Liu, Pengwei and Gu, Hanxue and Feng, Tao and Ni, Dong}, title = {SAMora: Enhancing SAM through Hierarchical Self-Supervised Pre-Training for Medical Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21209-21219} }
PathDiff: Histopathology Image Synthesis with Unpaired Text and Mask Conditions: Mahesh Bhosale,

Abdul Wasi,

Yuanhao Zhai,

Yunjie Tian,

Samuel Border,

Nan Xi,

Pinaki Sarder,

Junsong Yuan,

David Doermann,

Xuan Gong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bhosale_2025_ICCV, author = {Bhosale, Mahesh and Wasi, Abdul and Zhai, Yuanhao and Tian, Yunjie and Border, Samuel and Xi, Nan and Sarder, Pinaki and Yuan, Junsong and Doermann, David and Gong, Xuan}, title = {PathDiff: Histopathology Image Synthesis with Unpaired Text and Mask Conditions}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22415-22424} }
GVDepth: Zero-Shot Monocular Depth Estimation for Ground Vehicles based on Probabilistic Cue Fusion: Karlo Koledić,

Luka Petrović,

Ivan Marković,

Ivan Petrović; [pdf] [supp]
[bibtex]
@InProceedings{Koledic_2025_ICCV, author = {Koledi\'c, Karlo and Petrovi\'c, Luka and Markovi\'c, Ivan and Petrovi\'c, Ivan}, title = {GVDepth: Zero-Shot Monocular Depth Estimation for Ground Vehicles based on Probabilistic Cue Fusion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26126-26135} }
One Trajectory, One Token: Grounded Video Tokenization via Panoptic Sub-object Trajectory: Chenhao Zheng,

Jieyu Zhang,

Mohammadreza Salehi,

Ziqi Gao,

Vishnu Iyengar,

Norimasa Kobori,

Quan Kong,

Ranjay Krishna; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_ICCV, author = {Zheng, Chenhao and Zhang, Jieyu and Salehi, Mohammadreza and Gao, Ziqi and Iyengar, Vishnu and Kobori, Norimasa and Kong, Quan and Krishna, Ranjay}, title = {One Trajectory, One Token: Grounded Video Tokenization via Panoptic Sub-object Trajectory}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {23156-23166} }
Kaleidoscopic Background Attack: Disrupting Pose Estimation with Multi-Fold Radial Symmetry Textures: Xinlong Ding,

Hongwei Yu,

Jiawei Li,

Feifan Li,

Yu Shang,

Bochao Zou,

Huimin Ma,

Jiansheng Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ding_2025_ICCV, author = {Ding, Xinlong and Yu, Hongwei and Li, Jiawei and Li, Feifan and Shang, Yu and Zou, Bochao and Ma, Huimin and Chen, Jiansheng}, title = {Kaleidoscopic Background Attack: Disrupting Pose Estimation with Multi-Fold Radial Symmetry Textures}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28483-28492} }
Agreement aware and dissimilarity oriented GLOM: Ru Zeng,

Yan Song,

Yang Zhang,

Yanling Hu,

Hui Yu; [pdf] [supp]
[bibtex]
@InProceedings{Zeng_2025_ICCV, author = {Zeng, Ru and Song, Yan and Zhang, Yang and Hu, Yanling and Yu, Hui}, title = {Agreement aware and dissimilarity oriented GLOM}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {24351-24359} }
PanoSplatt3R: Leveraging Perspective Pretraining for Generalized Unposed Wide-Baseline Panorama Reconstruction: Jiahui Ren,

Mochu Xiang,

Jiajun Zhu,

Yuchao Dai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2025_ICCV, author = {Ren, Jiahui and Xiang, Mochu and Zhu, Jiajun and Dai, Yuchao}, title = {PanoSplatt3R: Leveraging Perspective Pretraining for Generalized Unposed Wide-Baseline Panorama Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {28959-28969} }
SmolDocling: An ultra-compact vision-language model for end-to-end multi-modal document conversion: Ahmed Nassar,

Matteo Omenetti,

Maksym Lysak,

Nikolaos Livathinos,

Christoph Auer,

Lucas Morin,

Rafael Teixeira de Lima,

Yusik Kim,

A. Said Gurbuz,

Michele Dolfi,

Peter W. J. Staar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nassar_2025_ICCV, author = {Nassar, Ahmed and Omenetti, Matteo and Lysak, Maksym and Livathinos, Nikolaos and Auer, Christoph and Morin, Lucas and de Lima, Rafael Teixeira and Kim, Yusik and Gurbuz, A. Said and Dolfi, Michele and Staar, Peter W. J.}, title = {SmolDocling: An ultra-compact vision-language model for end-to-end multi-modal document conversion}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {21972-21983} }
Zero-Shot Composed Image Retrieval via Dual-Stream Instruction-Aware Distillation: Wenliang Zhong,

Rob Barton,

Weizhi An,

Feng Jiang,

Hehuan Ma,

Yuzhi Guo,

Abhishek Dan,

Shioulin Sam,

Karim Bouyarmane,

Junzhou Huang; [pdf] [supp]
[bibtex]
@InProceedings{Zhong_2025_ICCV, author = {Zhong, Wenliang and Barton, Rob and An, Weizhi and Jiang, Feng and Ma, Hehuan and Guo, Yuzhi and Dan, Abhishek and Sam, Shioulin and Bouyarmane, Karim and Huang, Junzhou}, title = {Zero-Shot Composed Image Retrieval via Dual-Stream Instruction-Aware Distillation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {22221-22231} }
UniGS: Modeling Unitary 3D Gaussians for Novel View Synthesis from Sparse-view Images: Jiamin Wu,

Kenkun Liu,

Xiaoke Jiang,

Yuan Yao,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Jiamin and Liu, Kenkun and Jiang, Xiaoke and Yao, Yuan and Zhang, Lei}, title = {UniGS: Modeling Unitary 3D Gaussians for Novel View Synthesis from Sparse-view Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {26241-26251} }
SP2T: Sparse Proxy Attention for Dual-stream Point Transformer: Jiaxu Wan,

Hong Zhang,

Ziqi He,

Yangyan Deng,

Qishu Wang,

Ding Yuan,

Yifan Yang; [pdf] [supp]
[bibtex]
@InProceedings{Wan_2025_ICCV, author = {Wan, Jiaxu and Zhang, Hong and He, Ziqi and Deng, Yangyan and Wang, Qishu and Yuan, Ding and Yang, Yifan}, title = {SP2T: Sparse Proxy Attention for Dual-stream Point Transformer}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = {October}, year = {2025}, pages = {27885-27895} }; Back