AI4RWC: The 2nd International Workshop on Vision Intelligence for Real-world Challenges


Event-Level Detection of Surgical Instrument Handovers in Videos with Interpretable Vision Model
Katerina Katsarou,
George Zountsas,
Karam Tomotaki-Dawoud,
Alex Ehrenhoefer,
Paul Chojecki,
David Przewozny,
Detlef Runde,
Igor Maximilian Sauer,
Amira Mouakher,
Sebastian Bosse
[pdf] [supp]
[bibtex]
@InProceedings{Katsarou_2026_CVPR, author = {Katsarou, Katerina and Zountsas, George and Tomotaki-Dawoud, Karam and Ehrenhoefer, Alex and Chojecki, Paul and Przewozny, David and Runde, Detlef and Sauer, Igor Maximilian and Mouakher, Amira and Bosse, Sebastian}, title = {Event-Level Detection of Surgical Instrument Handovers in Videos with Interpretable Vision Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9184-9193} }

SinSEMI: A One-Shot Image Generation Model and Data-Efficient Evaluation Framework for Semiconductor Inspection Equipment
ChunLiang Wu,
Xiaochun Li
[pdf] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, ChunLiang and Li, Xiaochun}, title = {SinSEMI: A One-Shot Image Generation Model and Data-Efficient Evaluation Framework for Semiconductor Inspection Equipment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9383-9391} }

How CT Window Configurations Affect the Grading Performance of 3D Radiology diagnostics
Bo Peng,
Chao Xu,
Boyu Chen,
Daqian Shi
[pdf]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Bo and Xu, Chao and Chen, Boyu and Shi, Daqian}, title = {How CT Window Configurations Affect the Grading Performance of 3D Radiology diagnostics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9270-9279} }

Improving Efficiency and Reliability of Computer Vision Models in Real-World Deployment with MX Quantization
Jinghao Wen,
Ruixuan Wang,
Shaohuang Wang,
Xun Jiao
[pdf]
[bibtex]
@InProceedings{Wen_2026_CVPR, author = {Wen, Jinghao and Wang, Ruixuan and Wang, Shaohuang and Jiao, Xun}, title = {Improving Efficiency and Reliability of Computer Vision Models in Real-World Deployment with MX Quantization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9373-9382} }

SynthPID: P&ID digitization from Topology-Preserving Synthetic Data
Suraj Prasad,
Pinak Mahapatra
[pdf] [arXiv]
[bibtex]
@InProceedings{Prasad_2026_CVPR, author = {Prasad, Suraj and Mahapatra, Pinak}, title = {SynthPID: P\&ID digitization from Topology-Preserving Synthetic Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9280-9286} }

Boxes2Pixels: Learning Defect Segmentation from Noisy SAM Masks
Camile Lendering,
Erkut Akdag,
Egor Bondarau
[pdf]
[bibtex]
@InProceedings{Lendering_2026_CVPR, author = {Lendering, Camile and Akdag, Erkut and Bondarau, Egor}, title = {Boxes2Pixels: Learning Defect Segmentation from Noisy SAM Masks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9203-9212} }

Towards AI Alignment for Medical Imaging: A Unified Framework for Cross-Modal Fairness, Uncertainty Quantification, and Robustness
Arijit Patra
[pdf]
[bibtex]
@InProceedings{Patra_2026_CVPR, author = {Patra, Arijit}, title = {Towards AI Alignment for Medical Imaging: A Unified Framework for Cross-Modal Fairness, Uncertainty Quantification, and Robustness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9261-9269} }

Learning Adaptive Geometry for Robust Real-World Vision-Language Understanding
Sarthak Srivastava,
Kathy Wu
[pdf]
[bibtex]
@InProceedings{Srivastava_2026_CVPR, author = {Srivastava, Sarthak and Wu, Kathy}, title = {Learning Adaptive Geometry for Robust Real-World Vision-Language Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9357-9361} }

Cross-Source Supervision for Bone Infection Segmentation in Dual-Modality PET-CT
Zonglin Yang,
Xiaolei Diao,
Jishizhan Chen,
Man Xiaozhuang,
Wei Kong,
Gen Wen,
Pengfei Cheng,
Daqian Shi
[pdf] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Zonglin and Diao, Xiaolei and Chen, Jishizhan and Xiaozhuang, Man and Kong, Wei and Wen, Gen and Cheng, Pengfei and Shi, Daqian}, title = {Cross-Source Supervision for Bone Infection Segmentation in Dual-Modality PET-CT}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9411-9420} }

Log-Gated Dual-Path Conditioning Framework for Multimodal Geological Core Image Generation
Daqian Shi,
Cedric M John
[pdf]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Daqian and John, Cedric M}, title = {Log-Gated Dual-Path Conditioning Framework for Multimodal Geological Core Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9315-9324} }

Zero-Shot Polyp Detection in Open-World Endoscopy via A Cascaded Detector-Verifier Framework
Shengkai Xu,
Junqiao Wang,
Kunyu Wu,
Yimeng Wan,
Yuqi Ouyang
[pdf]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Shengkai and Wang, Junqiao and Wu, Kunyu and Wan, Yimeng and Ouyang, Yuqi}, title = {Zero-Shot Polyp Detection in Open-World Endoscopy via A Cascaded Detector-Verifier Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9402-9410} }

Z-IRIS: Zero-Shot Road Presence Detection in Aerial Tiles via Segment Proposals and Vision-Language Scoring
Janos Horvath
[pdf]
[bibtex]
@InProceedings{Horvath_2026_CVPR, author = {Horvath, Janos}, title = {Z-IRIS: Zero-Shot Road Presence Detection in Aerial Tiles via Segment Proposals and Vision-Language Scoring}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9155-9164} }

Stop Position Ranking: Safety Benchmarking of Automated Drive Systems for Picking up Passengers
Gaëtan Margueritte,
Pacharapon Arpanantikul,
Anirudh Kondapally,
Benedict Magnus Runte,
Kentaro Yamada,
Hitomi Yanaka
[pdf]
[bibtex]
@InProceedings{Margueritte_2026_CVPR, author = {Margueritte, Ga\"etan and Arpanantikul, Pacharapon and Kondapally, Anirudh and Runte, Benedict Magnus and Yamada, Kentaro and Yanaka, Hitomi}, title = {Stop Position Ranking: Safety Benchmarking of Automated Drive Systems for Picking up Passengers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9233-9240} }

MOTOR-Bench: A Real-world Dataset and Multi-agent Framework for Zero-shot Human Mental State Understanding
Xiaoyu Yuan,
Niklas Heikkala,
Tiina Törmänen,
Hanna Järvenoja,
Guoying Zhao,
Haoyu Chen
[pdf]
[bibtex]
@InProceedings{Yuan_2026_CVPR, author = {Yuan, Xiaoyu and Heikkala, Niklas and T\"orm\"anen, Tiina and J\"arvenoja, Hanna and Zhao, Guoying and Chen, Haoyu}, title = {MOTOR-Bench: A Real-world Dataset and Multi-agent Framework for Zero-shot Human Mental State Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9431-9439} }

VisChainBench: Benchmarking Multi-Image, Multi-Turn Visual Reasoning Beyond Language Priors for Real-World AI Challenges
Wenbo Lyu,
Yingjun Du,
Jinglin Zhao,
Xiantong Zhen,
Ling Shao
[pdf]
[bibtex]
@InProceedings{Lyu_2026_CVPR, author = {Lyu, Wenbo and Du, Yingjun and Zhao, Jinglin and Zhen, Xiantong and Shao, Ling}, title = {VisChainBench: Benchmarking Multi-Image, Multi-Turn Visual Reasoning Beyond Language Priors for Real-World AI Challenges}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9223-9232} }

GazeVLM: A Vision-Language Model for Multi-Task Gaze Understanding
Athul M Mathew,
Haithem Hermassi,
Thariq Kadavil,
Arshad Ali Khan
[pdf]
[bibtex]
@InProceedings{Mathew_2026_CVPR, author = {Mathew, Athul M and Hermassi, Haithem and Kadavil, Thariq and Khan, Arshad Ali}, title = {GazeVLM: A Vision-Language Model for Multi-Task Gaze Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9241-9250} }

Refining Image-to-3D Foundation Models via Geometric Supervision for Industrial Plant Reconstruction
SangEun Lee,
Wonseok Chae,
Hoyoung Yoo,
Geunyong Kim,
NackWoo Kim,
Hyeonjin Kim
[pdf]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, SangEun and Chae, Wonseok and Yoo, Hoyoung and Kim, Geunyong and Kim, NackWoo and Kim, Hyeonjin}, title = {Refining Image-to-3D Foundation Models via Geometric Supervision for Industrial Plant Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9194-9202} }

Opinion Mining and Dynamic Topic Modeling in Streaming Public Discourse with LLMs
Rite Bo,
Deming Guo,
Hongda Zhang,
Zhiqi Zhang,
Lida Shi,
Hao Xu,
Daqian Shi
[pdf]
[bibtex]
@InProceedings{Bo_2026_CVPR, author = {Bo, Rite and Guo, Deming and Zhang, Hongda and Zhang, Zhiqi and Shi, Lida and Xu, Hao and Shi, Daqian}, title = {Opinion Mining and Dynamic Topic Modeling in Streaming Public Discourse with LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9080-9089} }

DRCoD: Toward Robust Continual Learning of Diffusion Models for Tire Manufacturing Prototyping
Jisu Shin,
Sol Lee,
Sungrae Hong,
A young Kim,
Youngbin You,
Jeongheon Park,
Jungsoo Oh,
Mun Yi
[pdf] [supp]
[bibtex]
@InProceedings{Shin_2026_CVPR, author = {Shin, Jisu and Lee, Sol and Hong, Sungrae and Kim, A young and You, Youngbin and Park, Jeongheon and Oh, Jungsoo and Yi, Mun}, title = {DRCoD: Toward Robust Continual Learning of Diffusion Models for Tire Manufacturing Prototyping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9335-9344} }

From Pixels to Semantics: A Multi-Stage AI Framework for Structural Damage Detection in Satellite Imagery
Bijay Shakya,
Catherine Hoier,
Khandaker Mamun Ahmed
[pdf] [arXiv]
[bibtex]
@InProceedings{Shakya_2026_CVPR, author = {Shakya, Bijay and Hoier, Catherine and Ahmed, Khandaker Mamun}, title = {From Pixels to Semantics: A Multi-Stage AI Framework for Structural Damage Detection in Satellite Imagery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9305-9314} }

HyperRealm: Hyperbolic Vision Language Models for Real-World Hierarchical Multimodal Understanding
Sarthak Srivastava,
Kathy Wu
[pdf]
[bibtex]
@InProceedings{Srivastava_2026_CVPR, author = {Srivastava, Sarthak and Wu, Kathy}, title = {HyperRealm: Hyperbolic Vision Language Models for Real-World Hierarchical Multimodal Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9345-9356} }

Omni-NegCLIP: Enhancing CLIP with Front-Layer Contrastive Fine-Tuning for Comprehensive Negation Understanding
Jingqi Xu
[pdf] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Jingqi}, title = {Omni-NegCLIP: Enhancing CLIP with Front-Layer Contrastive Fine-Tuning for Comprehensive Negation Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9392-9401} }

RAG4Outcome: A Retrieval-Augmented Multimodal Framework for Prognostic Prediction in Chronic Osteomyelitis
Daqian Shi,
Pei Han,
Jishizhan Chen,
Yang Wang,
Xiaolei Diao,
Xianyou Zheng,
Pengfei Cheng
[pdf] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Daqian and Han, Pei and Chen, Jishizhan and Wang, Yang and Diao, Xiaolei and Zheng, Xianyou and Cheng, Pengfei}, title = {RAG4Outcome: A Retrieval-Augmented Multimodal Framework for Prognostic Prediction in Chronic Osteomyelitis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9325-9334} }

Ultrasound-led stratification of carpal tunnel syndrome reveals structure-function mismatch
Jishizhan Chen,
Daqian Shi,
Jiaqi Su,
Xiaohai Huang,
Yun Qian
[pdf]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Jishizhan and Shi, Daqian and Su, Jiaqi and Huang, Xiaohai and Qian, Yun}, title = {Ultrasound-led stratification of carpal tunnel syndrome reveals structure-function mismatch}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9107-9112} }

Crowdsourcing of Real-world Image Annotation via Visual Properties
Xiaolei Diao,
Fausto Giunchiglia
[pdf] [arXiv]
[bibtex]
@InProceedings{Diao_2026_CVPR, author = {Diao, Xiaolei and Giunchiglia, Fausto}, title = {Crowdsourcing of Real-world Image Annotation via Visual Properties}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9125-9134} }

Layout-Aware Representation Learning for Open-Set ID Fraud Discovery
Jinxing Li,
Nicholas Ren,
Cathy Chang,
Hongkai Pan,
Daniel George
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jinxing and Ren, Nicholas and Chang, Cathy and Pan, Hongkai and George, Daniel}, title = {Layout-Aware Representation Learning for Open-Set ID Fraud Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9213-9222} }

AOI-SSL: Self-Supervised Framework for Efficient Segmentation of Wire-bonded Semiconductors In Optical Inspection
Joaquín Figueira,
Rob van Gastel,
Giacomo D'Amicantonio,
Zhuoran Liu,
Ioan Gabriel Bucur,
Faysal Boughorbel,
Egor Bondarev
[pdf] [supp]
[bibtex]
@InProceedings{Figueira_2026_CVPR, author = {Figueira, Joaqu{\'\i}n and van Gastel, Rob and D'Amicantonio, Giacomo and Liu, Zhuoran and Bucur, Ioan Gabriel and Boughorbel, Faysal and Bondarev, Egor}, title = {AOI-SSL: Self-Supervised Framework for Efficient Segmentation of Wire-bonded Semiconductors In Optical Inspection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9135-9144} }

Zero-Shot Chinese Character Recognition via Global-Local Dual-Branch Alignment and Hierarchical Inference
Wei Cao,
Hao Xu,
Xiaolei Diao
[pdf] [arXiv]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Wei and Xu, Hao and Diao, Xiaolei}, title = {Zero-Shot Chinese Character Recognition via Global-Local Dual-Branch Alignment and Hierarchical Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9090-9098} }

Look, Reason, Defuse: Bridging Perception and Domain Knowledge for Real-World Unexploded Ordnance Identification
Gheorghe Marian Craioveanu,
Grigore Stamatescu,
Olga Saukh
[pdf] [supp]
[bibtex]
@InProceedings{Craioveanu_2026_CVPR, author = {Craioveanu, Gheorghe Marian and Stamatescu, Grigore and Saukh, Olga}, title = {Look, Reason, Defuse: Bridging Perception and Domain Knowledge for Real-World Unexploded Ordnance Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9113-9124} }

WildFireVQA: A Large-Scale Radiometric Thermal VQA Benchmark for Aerial Wildfire Monitoring
Mobin Habibpour,
Niloufar Alipour Talemi,
John Spodnik,
Camren J Khoury,
Fatemeh Afghah
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Habibpour_2026_CVPR, author = {Habibpour, Mobin and Talemi, Niloufar Alipour and Spodnik, John and Khoury, Camren J and Afghah, Fatemeh}, title = {WildFireVQA: A Large-Scale Radiometric Thermal VQA Benchmark for Aerial Wildfire Monitoring}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9145-9154} }

Rethinking Data Augmentation for Multi-View Underwater Species Dataset: A Data-Centric Analysis of Feature Space Study
Mushfika Sharmin Rahman,
Greg Hamerly
[pdf]
[bibtex]
@InProceedings{Rahman_2026_CVPR, author = {Rahman, Mushfika Sharmin and Hamerly, Greg}, title = {Rethinking Data Augmentation for Multi-View Underwater Species Dataset: A Data-Centric Analysis of Feature Space Study}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9287-9293} }

PathoFusion: Scalable Multimodal Foundations for Integrating Clinical and Preclinical Imaging Analyses in Real-World Pharmaceutical discovery workflows
Arijit Patra,
Phil Scordis
[pdf]
[bibtex]
@InProceedings{Patra_2026_CVPR, author = {Patra, Arijit and Scordis, Phil}, title = {PathoFusion: Scalable Multimodal Foundations for Integrating Clinical and Preclinical Imaging Analyses in Real-World Pharmaceutical discovery workflows}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9251-9260} }

R3PM-Net: Real-time, Robust, Real-world Point Matching Network
Yasaman Kashefbahrami,
Erkut Akdag,
Panagiotis Meletis,
Evgeniya Balmashnova,
Dip Goswami,
Egor Bondarau
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kashefbahrami_2026_CVPR, author = {Kashefbahrami, Yasaman and Akdag, Erkut and Meletis, Panagiotis and Balmashnova, Evgeniya and Goswami, Dip and Bondarau, Egor}, title = {R3PM-Net: Real-time, Robust, Real-world Point Matching Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9174-9183} }

FruitEnsemble: MLLM-Guided Arbitration for Heterogeneous ensemble in Fine-Grained Fruit Recognition
Enhui Yu,
Junhui Li,
Ruitong Lu,
Jialu Li,
Youshan Zhang
[pdf] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Enhui and Li, Junhui and Lu, Ruitong and Li, Jialu and Zhang, Youshan}, title = {FruitEnsemble: MLLM-Guided Arbitration for Heterogeneous ensemble in Fine-Grained Fruit Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9421-9430} }

Can LLM-Generated Text Empower Surgical Vision-Language Pre-training?
Chengan Che,
Chao Wang,
Jiayuan Huang,
Xinyue Chen,
Luis C. Garcia-Peraza-Herrera
[pdf] [arXiv]
[bibtex]
@InProceedings{Che_2026_CVPR, author = {Che, Chengan and Wang, Chao and Huang, Jiayuan and Chen, Xinyue and Garcia-Peraza-Herrera, Luis C.}, title = {Can LLM-Generated Text Empower Surgical Vision-Language Pre-training?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9099-9106} }

Hyperbolic Twins: Efficient Finetuning Vision Language Model for Few-Shot Learning via Hyperbolic Geometry
Teng Jiek See
[pdf]
[bibtex]
@InProceedings{See_2026_CVPR, author = {See, Teng Jiek}, title = {Hyperbolic Twins: Efficient Finetuning Vision Language Model for Few-Shot Learning via Hyperbolic Geometry}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9294-9304} }

STONE: Stable Optimization in Noisy Environments for Robust Vision-Language Models
Sarthak Srivastava,
Kathy Wu
[pdf]
[bibtex]
@InProceedings{Srivastava_2026_CVPR, author = {Srivastava, Sarthak and Wu, Kathy}, title = {STONE: Stable Optimization in Noisy Environments for Robust Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9362-9372} }

VisText-Mosquito: A Unified Multimodal Dataset for Visual Detection, Segmentation, and Textual Explanation on Mosquito Breeding Sites
Md. Adnanul Islam,
Md. Faiyaz Abdullah Sayeedi,
Md. Asaduzzaman Shuvo,
Shahanur Rahman Bappy,
Muhammad Ziaur Rahman,
Md Asiful Islam,
Swakkhar Shatabda
[pdf] [arXiv]
[bibtex]
@InProceedings{Islam_2026_CVPR, author = {Islam, Md. Adnanul and Sayeedi, Md. Faiyaz Abdullah and Shuvo, Md. Asaduzzaman and Bappy, Shahanur Rahman and Rahman, Muhammad Ziaur and Islam, Md Asiful and Shatabda, Swakkhar}, title = {VisText-Mosquito: A Unified Multimodal Dataset for Visual Detection, Segmentation, and Textual Explanation on Mosquito Breeding Sites}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9165-9173} }

HierSum: A Global and Local Attention Mechanism for Video Summarization
Apoorva Beedu,
Irfan Essa
[pdf] [arXiv]
[bibtex]
@InProceedings{Beedu_2026_CVPR, author = {Beedu, Apoorva and Essa, Irfan}, title = {HierSum: A Global and Local Attention Mechanism for Video Summarization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9069-9079} }