Computer Vision Systems for Document Analysis and Recognition
Deep Learning-Based Intrusion Detection Systems for Phishing Email Detection: A Short Survey-
[pdf]
[bibtex]@InProceedings{De_Nardin_2025_ICCV, author = {De Nardin, Axel and Zottin, Silvia and Piciarelli, Claudio and Foresti, Gian Luca}, title = {Deep Learning-Based Intrusion Detection Systems for Phishing Email Detection: A Short Survey}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7567-7575} }
DocSemi: Efficient Document Layout Analysis with Guided Queries-
[pdf]
[bibtex]@InProceedings{Shehzadi_2025_ICCV, author = {Shehzadi, Tahira and Ifza, Ifza and Stricker, Didier and Afzal, Muhammad Zeshan}, title = {DocSemi: Efficient Document Layout Analysis with Guided Queries}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7536-7546} }
ChemMiner: A Large Language Model Agent System for Chemical Literature Data Mining-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2025_ICCV, author = {Chen, Kexin and Du, Yuyang and Li, Junyou and Cao, Hanqun and Guo, Menghao and Dang, Xilin and Li, Lanqing and Qiu, Jiezhong and Chen, Guangyong and Heng, Pheng Ann}, title = {ChemMiner: A Large Language Model Agent System for Chemical Literature Data Mining}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7595-7603} }
Describe Anything Model for Visual Question Answering on Text-rich Images-
[pdf]
[arXiv]
[bibtex]@InProceedings{Vu_2025_ICCV, author = {Vu, Yen-Linh and Duong, Dinh-Thang and Duong, Truong-Binh and Nguyen, Anh-Khoi and Nguyen, Thanh-Huy and Nguyen, Le Thien Phuc and Xing, Jianhua and Li, Xingjian and Wang, Tianyang and Bagci, Ulas and Xu, Min}, title = {Describe Anything Model for Visual Question Answering on Text-rich Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7484-7494} }
PRISM: Pruning for Rank-adaptive Interpretable Segmentation Model with Application to Historical Document Multiband Images-
[pdf]
[supp]
[bibtex]@InProceedings{Declercq_2025_ICCV, author = {Declercq, Kilian and Rahiche, Abderrahmane and Cheriet, Mohamed}, title = {PRISM: Pruning for Rank-adaptive Interpretable Segmentation Model with Application to Historical Document Multiband Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7526-7535} }
Text Image Generation for Low-Resource Languages with Dual Translation Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Noguchi_2025_ICCV, author = {Noguchi, Chihiro and Fukuda, Shun and Mihara, Shoichiro and Yamanaka, Masao}, title = {Text Image Generation for Low-Resource Languages with Dual Translation Learning}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7441-7451} }
Improved Information Extraction by Leveraging Multi-Hypothesis OCR at Inference Time-
[pdf]
[bibtex]@InProceedings{Hemmer_2025_ICCV, author = {Hemmer, Arthur and Bartolo, Nicola and Coustaty, Micka\"el and Ogier, Jean-Marc}, title = {Improved Information Extraction by Leveraging Multi-Hypothesis OCR at Inference Time}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7576-7584} }
CoSMo: A Multimodal Transformer for Page Stream Segmentation in Comic Books-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ortega_2025_ICCV, author = {Ortega, Marc Serra and Vivoli, Emanuele and Llabres, Artemis and Karatzas, Dimosthenis}, title = {CoSMo: A Multimodal Transformer for Page Stream Segmentation in Comic Books}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7506-7514} }
DIVE-Doc: Downscaling foundational Image Visual Encoder into hierarchical architecture for DocVQA-
[pdf]
[supp]
[bibtex]@InProceedings{Bencharef_2025_ICCV, author = {Bencharef, Rayane and Rahiche, Abderrahmane and Cheriet, Mohamed}, title = {DIVE-Doc: Downscaling foundational Image Visual Encoder into hierarchical architecture for DocVQA}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7547-7556} }
ZOD : Zero-shot and Out-of-Distribution Detection Dataset for Document Images-
[pdf]
[supp]
[bibtex]@InProceedings{Sheikh_2025_ICCV, author = {Sheikh, Talha Uddin and Sinha, Sankalp and Sam, Shino and Stricker, Didier and Afzal, Muhammad Zeshan}, title = {ZOD : Zero-shot and Out-of-Distribution Detection Dataset for Document Images}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7495-7505} }
Structure-aware Contrastive Learning for Diagram Understanding of Multimodal Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Sasaki_2025_ICCV, author = {Sasaki, Hiroshi}, title = {Structure-aware Contrastive Learning for Diagram Understanding of Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7463-7472} }
CTC Transcription Alignment of the Bullinger Letters: Automatic Improvement of Annotation Quality-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peer_2025_ICCV, author = {Peer, Marco and Scius-Bertrand, Anna and Fischer, Andreas}, title = {CTC Transcription Alignment of the Bullinger Letters: Automatic Improvement of Annotation Quality}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7557-7566} }
Quo Vadis Handwritten Text Generation for Handwritten Text Recognition?-
[pdf]
[arXiv]
[bibtex]@InProceedings{Pippi_2025_ICCV, author = {Pippi, Vittorio and Nikolaidou, Konstantina and Cascianelli, Silvia and Retsinas, George and Sfikas, Giorgos and Cucchiara, Rita and Liwicki, Marcus}, title = {Quo Vadis Handwritten Text Generation for Handwritten Text Recognition?}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7473-7483} }
TAP-VL: Text Layout Aware Pretraining for Enriched Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Fhima_2025_ICCV, author = {Fhima, Jonathan and Ben, Elad and Nuriel, Oren and Kittenplon, Yair and Ganz, Roy and Aberdam, Aviad and Litman, Ron}, title = {TAP-VL: Text Layout Aware Pretraining for Enriched Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7452-7462} }
A Survey on Reading Order, Table of Contents, and Structure Extraction in Document Analysis-
[pdf]
[bibtex]@InProceedings{Giovannini_2025_ICCV, author = {Giovannini, Simone and Marinai, Simone}, title = {A Survey on Reading Order, Table of Contents, and Structure Extraction in Document Analysis}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7585-7594} }
Scanned documents forensics: detecting inserted characters through noise and chromatic artifacts-
[pdf]
[bibtex]@InProceedings{Gardella_2025_ICCV, author = {Gardella, Marina and Umpierrez, Julieta and Tadros, Antoine and Mowlavi, Seginus and Bottaioli, Natalia and Belzarena, Diego and Facciolo, Gabriele and He, Roy and Morel, Jean-Michel and Von Gioi, Rafael Grompone}, title = {Scanned documents forensics: detecting inserted characters through noise and chromatic artifacts}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {7515-7525} }