ICCV 2025 Open Access Repository

Computer Vision Systems for Document Analysis and Recognition

Improved Information Extraction by Leveraging Multi-Hypothesis OCR at Inference Time: Arthur Hemmer,

Nicola Bartolo,

Mickaël Coustaty,

Jean-Marc Ogier; [pdf]
[bibtex]
@InProceedings{Hemmer_2025_ICCV, author = {Hemmer, Arthur and Bartolo, Nicola and Coustaty, Micka\"el and Ogier, Jean-Marc}, title = {Improved Information Extraction by Leveraging Multi-Hypothesis OCR at Inference Time}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7635-7643} }
DIVE-Doc: Downscaling foundational Image Visual Encoder into hierarchical architecture for DocVQA: Rayane Bencharef,

Abderrahmane Rahiche,

Mohamed Cheriet; [pdf] [supp]
[bibtex]
@InProceedings{Bencharef_2025_ICCV, author = {Bencharef, Rayane and Rahiche, Abderrahmane and Cheriet, Mohamed}, title = {DIVE-Doc: Downscaling foundational Image Visual Encoder into hierarchical architecture for DocVQA}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7606-7615} }
ZOD : Zero-shot and Out-of-Distribution Detection Dataset for Document Images: Talha Uddin Sheikh,

Sankalp Sinha,

Shino Sam,

Didier Stricker,

Muhammad Zeshan Afzal; [pdf] [supp]
[bibtex]
@InProceedings{Sheikh_2025_ICCV, author = {Sheikh, Talha Uddin and Sinha, Sankalp and Sam, Shino and Stricker, Didier and Afzal, Muhammad Zeshan}, title = {ZOD : Zero-shot and Out-of-Distribution Detection Dataset for Document Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7554-7564} }
DocSemi: Efficient Document Layout Analysis with Guided Queries: Tahira Shehzadi,

Ifza Ifza,

Didier Stricker,

Muhammad Zeshan Afzal; [pdf]
[bibtex]
@InProceedings{Shehzadi_2025_ICCV, author = {Shehzadi, Tahira and Ifza, Ifza and Stricker, Didier and Afzal, Muhammad Zeshan}, title = {DocSemi: Efficient Document Layout Analysis with Guided Queries}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7595-7605} }
TAP-VL: Text Layout Aware Pretraining for Enriched Vision-Language Models: Jonathan Fhima,

Elad Ben,

Oren Nuriel,

Yair Kittenplon,

Roy Ganz,

Aviad Aberdam,

Ron Litman; [pdf] [supp]
[bibtex]
@InProceedings{Fhima_2025_ICCV, author = {Fhima, Jonathan and Ben, Elad and Nuriel, Oren and Kittenplon, Yair and Ganz, Roy and Aberdam, Aviad and Litman, Ron}, title = {TAP-VL: Text Layout Aware Pretraining for Enriched Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7511-7521} }
Text Image Generation for Low-Resource Languages with Dual Translation Learning: Chihiro Noguchi,

Shun Fukuda,

Shoichiro Mihara,

Masao Yamanaka; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Noguchi_2025_ICCV, author = {Noguchi, Chihiro and Fukuda, Shun and Mihara, Shoichiro and Yamanaka, Masao}, title = {Text Image Generation for Low-Resource Languages with Dual Translation Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7500-7510} }
ChemMiner: A Large Language Model Agent System for Chemical Literature Data Mining: Kexin Chen,

Yuyang Du,

Junyou Li,

Hanqun Cao,

Menghao Guo,

Xilin Dang,

Lanqing Li,

Jiezhong Qiu,

Guangyong Chen,

Pheng Ann Heng; [pdf] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Kexin and Du, Yuyang and Li, Junyou and Cao, Hanqun and Guo, Menghao and Dang, Xilin and Li, Lanqing and Qiu, Jiezhong and Chen, Guangyong and Heng, Pheng Ann}, title = {ChemMiner: A Large Language Model Agent System for Chemical Literature Data Mining}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7654-7662} }
Scanned documents forensics: detecting inserted characters through noise and chromatic artifacts: Marina Gardella,

Julieta Umpierrez,

Antoine Tadros,

Seginus Mowlavi,

Natalia Bottaioli,

Diego Belzarena,

Gabriele Facciolo,

Roy He,

Jean-Michel Morel,

Rafael Grompone Von Gioi; [pdf]
[bibtex]
@InProceedings{Gardella_2025_ICCV, author = {Gardella, Marina and Umpierrez, Julieta and Tadros, Antoine and Mowlavi, Seginus and Bottaioli, Natalia and Belzarena, Diego and Facciolo, Gabriele and He, Roy and Morel, Jean-Michel and Von Gioi, Rafael Grompone}, title = {Scanned documents forensics: detecting inserted characters through noise and chromatic artifacts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7574-7584} }
Describe Anything Model for Visual Question Answering on Text-rich Images: Yen-Linh Vu,

Dinh-Thang Duong,

Truong-Binh Duong,

Anh-Khoi Nguyen,

Thanh-Huy Nguyen,

Le Thien Phuc Nguyen,

Jianhua Xing,

Xingjian Li,

Tianyang Wang,

Ulas Bagci,

Min Xu; [pdf] [arXiv]
[bibtex]
@InProceedings{Vu_2025_ICCV, author = {Vu, Yen-Linh and Duong, Dinh-Thang and Duong, Truong-Binh and Nguyen, Anh-Khoi and Nguyen, Thanh-Huy and Nguyen, Le Thien Phuc and Xing, Jianhua and Li, Xingjian and Wang, Tianyang and Bagci, Ulas and Xu, Min}, title = {Describe Anything Model for Visual Question Answering on Text-rich Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7543-7553} }
CTC Transcription Alignment of the Bullinger Letters: Automatic Improvement of Annotation Quality: Marco Peer,

Anna Scius-Bertrand,

Andreas Fischer; [pdf] [supp]
[bibtex]
@InProceedings{Peer_2025_ICCV, author = {Peer, Marco and Scius-Bertrand, Anna and Fischer, Andreas}, title = {CTC Transcription Alignment of the Bullinger Letters: Automatic Improvement of Annotation Quality}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7616-7625} }
Quo Vadis Handwritten Text Generation for Handwritten Text Recognition?: Vittorio Pippi,

Konstantina Nikolaidou,

Silvia Cascianelli,

George Retsinas,

Giorgos Sfikas,

Rita Cucchiara,

Marcus Liwicki; [pdf] [arXiv]
[bibtex]
@InProceedings{Pippi_2025_ICCV, author = {Pippi, Vittorio and Nikolaidou, Konstantina and Cascianelli, Silvia and Retsinas, George and Sfikas, Giorgos and Cucchiara, Rita and Liwicki, Marcus}, title = {Quo Vadis Handwritten Text Generation for Handwritten Text Recognition?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7532-7542} }
CoSMo: A Multimodal Transformer for Page Stream Segmentation in Comic Books: Marc Serra Ortega,

Emanuele Vivoli,

Artemis Llabres,

Dimosthenis Karatzas; [pdf] [arXiv]
[bibtex]
@InProceedings{Ortega_2025_ICCV, author = {Ortega, Marc Serra and Vivoli, Emanuele and Llabres, Artemis and Karatzas, Dimosthenis}, title = {CoSMo: A Multimodal Transformer for Page Stream Segmentation in Comic Books}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7565-7573} }
Deep Learning-Based Intrusion Detection Systems for Phishing Email Detection: A Short Survey: Axel De Nardin,

Silvia Zottin,

Claudio Piciarelli,

Gian Luca Foresti; [pdf]
[bibtex]
@InProceedings{De_Nardin_2025_ICCV, author = {De Nardin, Axel and Zottin, Silvia and Piciarelli, Claudio and Foresti, Gian Luca}, title = {Deep Learning-Based Intrusion Detection Systems for Phishing Email Detection: A Short Survey}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7626-7634} }
Structure-aware Contrastive Learning for Diagram Understanding of Multimodal Models: Hiroshi Sasaki; [pdf] [arXiv]
[bibtex]
@InProceedings{Sasaki_2025_ICCV, author = {Sasaki, Hiroshi}, title = {Structure-aware Contrastive Learning for Diagram Understanding of Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7522-7531} }
A Survey on Reading Order, Table of Contents, and Structure Extraction in Document Analysis: Simone Giovannini,

Simone Marinai; [pdf]
[bibtex]
@InProceedings{Giovannini_2025_ICCV, author = {Giovannini, Simone and Marinai, Simone}, title = {A Survey on Reading Order, Table of Contents, and Structure Extraction in Document Analysis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7644-7653} }
PRISM: Pruning for Rank-adaptive Interpretable Segmentation Model with Application to Historical Document Multiband Images: Kilian Declercq,

Abderrahmane Rahiche,

Mohamed Cheriet; [pdf] [supp]
[bibtex]
@InProceedings{Declercq_2025_ICCV, author = {Declercq, Kilian and Rahiche, Abderrahmane and Cheriet, Mohamed}, title = {PRISM: Pruning for Rank-adaptive Interpretable Segmentation Model with Application to Historical Document Multiband Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7585-7594} }