Computer Vision Systems for Document Analysis and Recognition


Deep Learning-Based Intrusion Detection Systems for Phishing Email Detection: A Short Survey
Axel De Nardin,
Silvia Zottin,
Claudio Piciarelli,
Gian Luca Foresti
[pdf]
[bibtex]
@InProceedings{De_Nardin_2025_ICCV, author = {De Nardin, Axel and Zottin, Silvia and Piciarelli, Claudio and Foresti, Gian Luca}, title = {Deep Learning-Based Intrusion Detection Systems for Phishing Email Detection: A Short Survey}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7567-7575} }

DocSemi: Efficient Document Layout Analysis with Guided Queries
Tahira Shehzadi,
Ifza Ifza,
Didier Stricker,
Muhammad Zeshan Afzal
[pdf]
[bibtex]
@InProceedings{Shehzadi_2025_ICCV, author = {Shehzadi, Tahira and Ifza, Ifza and Stricker, Didier and Afzal, Muhammad Zeshan}, title = {DocSemi: Efficient Document Layout Analysis with Guided Queries}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7536-7546} }

ChemMiner: A Large Language Model Agent System for Chemical Literature Data Mining
Kexin Chen,
Yuyang Du,
Junyou Li,
Hanqun Cao,
Menghao Guo,
Xilin Dang,
Lanqing Li,
Jiezhong Qiu,
Guangyong Chen,
Pheng Ann Heng
[pdf] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Kexin and Du, Yuyang and Li, Junyou and Cao, Hanqun and Guo, Menghao and Dang, Xilin and Li, Lanqing and Qiu, Jiezhong and Chen, Guangyong and Heng, Pheng Ann}, title = {ChemMiner: A Large Language Model Agent System for Chemical Literature Data Mining}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7595-7603} }

Describe Anything Model for Visual Question Answering on Text-rich Images
Yen-Linh Vu,
Dinh-Thang Duong,
Truong-Binh Duong,
Anh-Khoi Nguyen,
Thanh-Huy Nguyen,
Le Thien Phuc Nguyen,
Jianhua Xing,
Xingjian Li,
Tianyang Wang,
Ulas Bagci,
Min Xu
[pdf] [arXiv]
[bibtex]
@InProceedings{Vu_2025_ICCV, author = {Vu, Yen-Linh and Duong, Dinh-Thang and Duong, Truong-Binh and Nguyen, Anh-Khoi and Nguyen, Thanh-Huy and Nguyen, Le Thien Phuc and Xing, Jianhua and Li, Xingjian and Wang, Tianyang and Bagci, Ulas and Xu, Min}, title = {Describe Anything Model for Visual Question Answering on Text-rich Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7484-7494} }

PRISM: Pruning for Rank-adaptive Interpretable Segmentation Model with Application to Historical Document Multiband Images
Kilian Declercq,
Abderrahmane Rahiche,
Mohamed Cheriet
[pdf] [supp]
[bibtex]
@InProceedings{Declercq_2025_ICCV, author = {Declercq, Kilian and Rahiche, Abderrahmane and Cheriet, Mohamed}, title = {PRISM: Pruning for Rank-adaptive Interpretable Segmentation Model with Application to Historical Document Multiband Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7526-7535} }

Text Image Generation for Low-Resource Languages with Dual Translation Learning
Chihiro Noguchi,
Shun Fukuda,
Shoichiro Mihara,
Masao Yamanaka
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Noguchi_2025_ICCV, author = {Noguchi, Chihiro and Fukuda, Shun and Mihara, Shoichiro and Yamanaka, Masao}, title = {Text Image Generation for Low-Resource Languages with Dual Translation Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7441-7451} }

Improved Information Extraction by Leveraging Multi-Hypothesis OCR at Inference Time
Arthur Hemmer,
Nicola Bartolo,
Mickaël Coustaty,
Jean-Marc Ogier
[pdf]
[bibtex]
@InProceedings{Hemmer_2025_ICCV, author = {Hemmer, Arthur and Bartolo, Nicola and Coustaty, Micka\"el and Ogier, Jean-Marc}, title = {Improved Information Extraction by Leveraging Multi-Hypothesis OCR at Inference Time}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7576-7584} }

CoSMo: A Multimodal Transformer for Page Stream Segmentation in Comic Books
Marc Serra Ortega,
Emanuele Vivoli,
Artemis Llabres,
Dimosthenis Karatzas
[pdf] [arXiv]
[bibtex]
@InProceedings{Ortega_2025_ICCV, author = {Ortega, Marc Serra and Vivoli, Emanuele and Llabres, Artemis and Karatzas, Dimosthenis}, title = {CoSMo: A Multimodal Transformer for Page Stream Segmentation in Comic Books}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7506-7514} }

DIVE-Doc: Downscaling foundational Image Visual Encoder into hierarchical architecture for DocVQA
Rayane Bencharef,
Abderrahmane Rahiche,
Mohamed Cheriet
[pdf] [supp]
[bibtex]
@InProceedings{Bencharef_2025_ICCV, author = {Bencharef, Rayane and Rahiche, Abderrahmane and Cheriet, Mohamed}, title = {DIVE-Doc: Downscaling foundational Image Visual Encoder into hierarchical architecture for DocVQA}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7547-7556} }

ZOD : Zero-shot and Out-of-Distribution Detection Dataset for Document Images
Talha Uddin Sheikh,
Sankalp Sinha,
Shino Sam,
Didier Stricker,
Muhammad Zeshan Afzal
[pdf] [supp]
[bibtex]
@InProceedings{Sheikh_2025_ICCV, author = {Sheikh, Talha Uddin and Sinha, Sankalp and Sam, Shino and Stricker, Didier and Afzal, Muhammad Zeshan}, title = {ZOD : Zero-shot and Out-of-Distribution Detection Dataset for Document Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7495-7505} }

Structure-aware Contrastive Learning for Diagram Understanding of Multimodal Models
Hiroshi Sasaki
[pdf] [arXiv]
[bibtex]
@InProceedings{Sasaki_2025_ICCV, author = {Sasaki, Hiroshi}, title = {Structure-aware Contrastive Learning for Diagram Understanding of Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7463-7472} }

CTC Transcription Alignment of the Bullinger Letters: Automatic Improvement of Annotation Quality
Marco Peer,
Anna Scius-Bertrand,
Andreas Fischer
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peer_2025_ICCV, author = {Peer, Marco and Scius-Bertrand, Anna and Fischer, Andreas}, title = {CTC Transcription Alignment of the Bullinger Letters: Automatic Improvement of Annotation Quality}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7557-7566} }

Quo Vadis Handwritten Text Generation for Handwritten Text Recognition?
Vittorio Pippi,
Konstantina Nikolaidou,
Silvia Cascianelli,
George Retsinas,
Giorgos Sfikas,
Rita Cucchiara,
Marcus Liwicki
[pdf] [arXiv]
[bibtex]
@InProceedings{Pippi_2025_ICCV, author = {Pippi, Vittorio and Nikolaidou, Konstantina and Cascianelli, Silvia and Retsinas, George and Sfikas, Giorgos and Cucchiara, Rita and Liwicki, Marcus}, title = {Quo Vadis Handwritten Text Generation for Handwritten Text Recognition?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7473-7483} }

TAP-VL: Text Layout Aware Pretraining for Enriched Vision-Language Models
Jonathan Fhima,
Elad Ben,
Oren Nuriel,
Yair Kittenplon,
Roy Ganz,
Aviad Aberdam,
Ron Litman
[pdf] [supp]
[bibtex]
@InProceedings{Fhima_2025_ICCV, author = {Fhima, Jonathan and Ben, Elad and Nuriel, Oren and Kittenplon, Yair and Ganz, Roy and Aberdam, Aviad and Litman, Ron}, title = {TAP-VL: Text Layout Aware Pretraining for Enriched Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7452-7462} }

A Survey on Reading Order, Table of Contents, and Structure Extraction in Document Analysis
Simone Giovannini,
Simone Marinai
[pdf]
[bibtex]
@InProceedings{Giovannini_2025_ICCV, author = {Giovannini, Simone and Marinai, Simone}, title = {A Survey on Reading Order, Table of Contents, and Structure Extraction in Document Analysis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7585-7594} }

Scanned documents forensics: detecting inserted characters through noise and chromatic artifacts
Marina Gardella,
Julieta Umpierrez,
Antoine Tadros,
Seginus Mowlavi,
Natalia Bottaioli,
Diego Belzarena,
Gabriele Facciolo,
Roy He,
Jean-Michel Morel,
Rafael Grompone Von Gioi
[pdf]
[bibtex]
@InProceedings{Gardella_2025_ICCV, author = {Gardella, Marina and Umpierrez, Julieta and Tadros, Antoine and Mowlavi, Seginus and Bottaioli, Natalia and Belzarena, Diego and Facciolo, Gabriele and He, Roy and Morel, Jean-Michel and Von Gioi, Rafael Grompone}, title = {Scanned documents forensics: detecting inserted characters through noise and chromatic artifacts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7515-7525} }