CVPR 2026 Open Access Repository

AI4RWC: The 2nd International Workshop on Vision Intelligence for Real-world Challenges

Event-Level Detection of Surgical Instrument Handovers in Videos with Interpretable Vision Model: Katerina Katsarou,

George Zountsas,

Karam Tomotaki-Dawoud,

Alex Ehrenhoefer,

Paul Chojecki,

David Przewozny,

Detlef Runde,

Igor Maximilian Sauer,

Amira Mouakher,

Sebastian Bosse; [pdf] [supp]
[bibtex]
@InProceedings{Katsarou_2026_CVPR, author = {Katsarou, Katerina and Zountsas, George and Tomotaki-Dawoud, Karam and Ehrenhoefer, Alex and Chojecki, Paul and Przewozny, David and Runde, Detlef and Sauer, Igor Maximilian and Mouakher, Amira and Bosse, Sebastian}, title = {Event-Level Detection of Surgical Instrument Handovers in Videos with Interpretable Vision Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9184-9193} }
SinSEMI: A One-Shot Image Generation Model and Data-Efficient Evaluation Framework for Semiconductor Inspection Equipment: ChunLiang Wu,

Xiaochun Li; [pdf] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, ChunLiang and Li, Xiaochun}, title = {SinSEMI: A One-Shot Image Generation Model and Data-Efficient Evaluation Framework for Semiconductor Inspection Equipment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9383-9391} }
How CT Window Configurations Affect the Grading Performance of 3D Radiology diagnostics: Bo Peng,

Chao Xu,

Boyu Chen,

Daqian Shi; [pdf]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Bo and Xu, Chao and Chen, Boyu and Shi, Daqian}, title = {How CT Window Configurations Affect the Grading Performance of 3D Radiology diagnostics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9270-9279} }
Improving Efficiency and Reliability of Computer Vision Models in Real-World Deployment with MX Quantization: Jinghao Wen,

Ruixuan Wang,

Shaohuang Wang,

Xun Jiao; [pdf]
[bibtex]
@InProceedings{Wen_2026_CVPR, author = {Wen, Jinghao and Wang, Ruixuan and Wang, Shaohuang and Jiao, Xun}, title = {Improving Efficiency and Reliability of Computer Vision Models in Real-World Deployment with MX Quantization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9373-9382} }
SynthPID: P&ID digitization from Topology-Preserving Synthetic Data: Suraj Prasad,

Pinak Mahapatra; [pdf] [arXiv]
[bibtex]
@InProceedings{Prasad_2026_CVPR, author = {Prasad, Suraj and Mahapatra, Pinak}, title = {SynthPID: P\&ID digitization from Topology-Preserving Synthetic Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9280-9286} }
Boxes2Pixels: Learning Defect Segmentation from Noisy SAM Masks: Camile Lendering,

Erkut Akdag,

Egor Bondarau; [pdf]
[bibtex]
@InProceedings{Lendering_2026_CVPR, author = {Lendering, Camile and Akdag, Erkut and Bondarau, Egor}, title = {Boxes2Pixels: Learning Defect Segmentation from Noisy SAM Masks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9203-9212} }
Towards AI Alignment for Medical Imaging: A Unified Framework for Cross-Modal Fairness, Uncertainty Quantification, and Robustness: Arijit Patra; [pdf]
[bibtex]
@InProceedings{Patra_2026_CVPR, author = {Patra, Arijit}, title = {Towards AI Alignment for Medical Imaging: A Unified Framework for Cross-Modal Fairness, Uncertainty Quantification, and Robustness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9261-9269} }
Learning Adaptive Geometry for Robust Real-World Vision-Language Understanding: Sarthak Srivastava,

Kathy Wu; [pdf]
[bibtex]
@InProceedings{Srivastava_2026_CVPR, author = {Srivastava, Sarthak and Wu, Kathy}, title = {Learning Adaptive Geometry for Robust Real-World Vision-Language Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9357-9361} }
Cross-Source Supervision for Bone Infection Segmentation in Dual-Modality PET-CT: Zonglin Yang,

Xiaolei Diao,

Jishizhan Chen,

Man Xiaozhuang,

Wei Kong,

Gen Wen,

Pengfei Cheng,

Daqian Shi; [pdf] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Zonglin and Diao, Xiaolei and Chen, Jishizhan and Xiaozhuang, Man and Kong, Wei and Wen, Gen and Cheng, Pengfei and Shi, Daqian}, title = {Cross-Source Supervision for Bone Infection Segmentation in Dual-Modality PET-CT}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9411-9420} }
Log-Gated Dual-Path Conditioning Framework for Multimodal Geological Core Image Generation: Daqian Shi,

Cedric M John; [pdf]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Daqian and John, Cedric M}, title = {Log-Gated Dual-Path Conditioning Framework for Multimodal Geological Core Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9315-9324} }
Zero-Shot Polyp Detection in Open-World Endoscopy via A Cascaded Detector-Verifier Framework: Shengkai Xu,

Junqiao Wang,

Kunyu Wu,

Yimeng Wan,

Yuqi Ouyang; [pdf]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Shengkai and Wang, Junqiao and Wu, Kunyu and Wan, Yimeng and Ouyang, Yuqi}, title = {Zero-Shot Polyp Detection in Open-World Endoscopy via A Cascaded Detector-Verifier Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9402-9410} }
Z-IRIS: Zero-Shot Road Presence Detection in Aerial Tiles via Segment Proposals and Vision-Language Scoring: Janos Horvath; [pdf]
[bibtex]
@InProceedings{Horvath_2026_CVPR, author = {Horvath, Janos}, title = {Z-IRIS: Zero-Shot Road Presence Detection in Aerial Tiles via Segment Proposals and Vision-Language Scoring}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9155-9164} }
Stop Position Ranking: Safety Benchmarking of Automated Drive Systems for Picking up Passengers: Gaëtan Margueritte,

Pacharapon Arpanantikul,

Anirudh Kondapally,

Benedict Magnus Runte,

Kentaro Yamada,

Hitomi Yanaka; [pdf]
[bibtex]
@InProceedings{Margueritte_2026_CVPR, author = {Margueritte, Ga\"etan and Arpanantikul, Pacharapon and Kondapally, Anirudh and Runte, Benedict Magnus and Yamada, Kentaro and Yanaka, Hitomi}, title = {Stop Position Ranking: Safety Benchmarking of Automated Drive Systems for Picking up Passengers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9233-9240} }
MOTOR-Bench: A Real-world Dataset and Multi-agent Framework for Zero-shot Human Mental State Understanding: Xiaoyu Yuan,

Niklas Heikkala,

Tiina Törmänen,

Hanna Järvenoja,

Guoying Zhao,

Haoyu Chen; [pdf]
[bibtex]
@InProceedings{Yuan_2026_CVPR, author = {Yuan, Xiaoyu and Heikkala, Niklas and T\"orm\"anen, Tiina and J\"arvenoja, Hanna and Zhao, Guoying and Chen, Haoyu}, title = {MOTOR-Bench: A Real-world Dataset and Multi-agent Framework for Zero-shot Human Mental State Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9431-9439} }
VisChainBench: Benchmarking Multi-Image, Multi-Turn Visual Reasoning Beyond Language Priors for Real-World AI Challenges: Wenbo Lyu,

Yingjun Du,

Jinglin Zhao,

Xiantong Zhen,

Ling Shao; [pdf]
[bibtex]
@InProceedings{Lyu_2026_CVPR, author = {Lyu, Wenbo and Du, Yingjun and Zhao, Jinglin and Zhen, Xiantong and Shao, Ling}, title = {VisChainBench: Benchmarking Multi-Image, Multi-Turn Visual Reasoning Beyond Language Priors for Real-World AI Challenges}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9223-9232} }
GazeVLM: A Vision-Language Model for Multi-Task Gaze Understanding: Athul M Mathew,

Haithem Hermassi,

Thariq Kadavil,

Arshad Ali Khan; [pdf]
[bibtex]
@InProceedings{Mathew_2026_CVPR, author = {Mathew, Athul M and Hermassi, Haithem and Kadavil, Thariq and Khan, Arshad Ali}, title = {GazeVLM: A Vision-Language Model for Multi-Task Gaze Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9241-9250} }
Refining Image-to-3D Foundation Models via Geometric Supervision for Industrial Plant Reconstruction: SangEun Lee,

Wonseok Chae,

Hoyoung Yoo,

Geunyong Kim,

NackWoo Kim,

Hyeonjin Kim; [pdf]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, SangEun and Chae, Wonseok and Yoo, Hoyoung and Kim, Geunyong and Kim, NackWoo and Kim, Hyeonjin}, title = {Refining Image-to-3D Foundation Models via Geometric Supervision for Industrial Plant Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9194-9202} }
Opinion Mining and Dynamic Topic Modeling in Streaming Public Discourse with LLMs: Rite Bo,

Deming Guo,

Hongda Zhang,

Zhiqi Zhang,

Lida Shi,

Hao Xu,

Daqian Shi; [pdf]
[bibtex]
@InProceedings{Bo_2026_CVPR, author = {Bo, Rite and Guo, Deming and Zhang, Hongda and Zhang, Zhiqi and Shi, Lida and Xu, Hao and Shi, Daqian}, title = {Opinion Mining and Dynamic Topic Modeling in Streaming Public Discourse with LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9080-9089} }
DRCoD: Toward Robust Continual Learning of Diffusion Models for Tire Manufacturing Prototyping: Jisu Shin,

Sol Lee,

Sungrae Hong,

A young Kim,

Youngbin You,

Jeongheon Park,

Jungsoo Oh,

Mun Yi; [pdf] [supp]
[bibtex]
@InProceedings{Shin_2026_CVPR, author = {Shin, Jisu and Lee, Sol and Hong, Sungrae and Kim, A young and You, Youngbin and Park, Jeongheon and Oh, Jungsoo and Yi, Mun}, title = {DRCoD: Toward Robust Continual Learning of Diffusion Models for Tire Manufacturing Prototyping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9335-9344} }
From Pixels to Semantics: A Multi-Stage AI Framework for Structural Damage Detection in Satellite Imagery: Bijay Shakya,

Catherine Hoier,

Khandaker Mamun Ahmed; [pdf] [arXiv]
[bibtex]
@InProceedings{Shakya_2026_CVPR, author = {Shakya, Bijay and Hoier, Catherine and Ahmed, Khandaker Mamun}, title = {From Pixels to Semantics: A Multi-Stage AI Framework for Structural Damage Detection in Satellite Imagery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9305-9314} }
HyperRealm: Hyperbolic Vision Language Models for Real-World Hierarchical Multimodal Understanding: Sarthak Srivastava,

Kathy Wu; [pdf]
[bibtex]
@InProceedings{Srivastava_2026_CVPR, author = {Srivastava, Sarthak and Wu, Kathy}, title = {HyperRealm: Hyperbolic Vision Language Models for Real-World Hierarchical Multimodal Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9345-9356} }
Omni-NegCLIP: Enhancing CLIP with Front-Layer Contrastive Fine-Tuning for Comprehensive Negation Understanding: Jingqi Xu; [pdf] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Jingqi}, title = {Omni-NegCLIP: Enhancing CLIP with Front-Layer Contrastive Fine-Tuning for Comprehensive Negation Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9392-9401} }
RAG4Outcome: A Retrieval-Augmented Multimodal Framework for Prognostic Prediction in Chronic Osteomyelitis: Daqian Shi,

Pei Han,

Jishizhan Chen,

Yang Wang,

Xiaolei Diao,

Xianyou Zheng,

Pengfei Cheng; [pdf] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Daqian and Han, Pei and Chen, Jishizhan and Wang, Yang and Diao, Xiaolei and Zheng, Xianyou and Cheng, Pengfei}, title = {RAG4Outcome: A Retrieval-Augmented Multimodal Framework for Prognostic Prediction in Chronic Osteomyelitis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9325-9334} }
Ultrasound-led stratification of carpal tunnel syndrome reveals structure-function mismatch: Jishizhan Chen,

Daqian Shi,

Jiaqi Su,

Xiaohai Huang,

Yun Qian; [pdf]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Jishizhan and Shi, Daqian and Su, Jiaqi and Huang, Xiaohai and Qian, Yun}, title = {Ultrasound-led stratification of carpal tunnel syndrome reveals structure-function mismatch}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9107-9112} }
Crowdsourcing of Real-world Image Annotation via Visual Properties: Xiaolei Diao,

Fausto Giunchiglia; [pdf] [arXiv]
[bibtex]
@InProceedings{Diao_2026_CVPR, author = {Diao, Xiaolei and Giunchiglia, Fausto}, title = {Crowdsourcing of Real-world Image Annotation via Visual Properties}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9125-9134} }
Layout-Aware Representation Learning for Open-Set ID Fraud Discovery: Jinxing Li,

Nicholas Ren,

Cathy Chang,

Hongkai Pan,

Daniel George; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jinxing and Ren, Nicholas and Chang, Cathy and Pan, Hongkai and George, Daniel}, title = {Layout-Aware Representation Learning for Open-Set ID Fraud Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9213-9222} }
AOI-SSL: Self-Supervised Framework for Efficient Segmentation of Wire-bonded Semiconductors In Optical Inspection: Joaquín Figueira,

Rob van Gastel,

Giacomo D'Amicantonio,

Zhuoran Liu,

Ioan Gabriel Bucur,

Faysal Boughorbel,

Egor Bondarev; [pdf] [supp]
[bibtex]
@InProceedings{Figueira_2026_CVPR, author = {Figueira, Joaqu{\'\i}n and van Gastel, Rob and D'Amicantonio, Giacomo and Liu, Zhuoran and Bucur, Ioan Gabriel and Boughorbel, Faysal and Bondarev, Egor}, title = {AOI-SSL: Self-Supervised Framework for Efficient Segmentation of Wire-bonded Semiconductors In Optical Inspection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9135-9144} }
Zero-Shot Chinese Character Recognition via Global-Local Dual-Branch Alignment and Hierarchical Inference: Wei Cao,

Hao Xu,

Xiaolei Diao; [pdf] [arXiv]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Wei and Xu, Hao and Diao, Xiaolei}, title = {Zero-Shot Chinese Character Recognition via Global-Local Dual-Branch Alignment and Hierarchical Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9090-9098} }
Look, Reason, Defuse: Bridging Perception and Domain Knowledge for Real-World Unexploded Ordnance Identification: Gheorghe Marian Craioveanu,

Grigore Stamatescu,

Olga Saukh; [pdf] [supp]
[bibtex]
@InProceedings{Craioveanu_2026_CVPR, author = {Craioveanu, Gheorghe Marian and Stamatescu, Grigore and Saukh, Olga}, title = {Look, Reason, Defuse: Bridging Perception and Domain Knowledge for Real-World Unexploded Ordnance Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9113-9124} }
WildFireVQA: A Large-Scale Radiometric Thermal VQA Benchmark for Aerial Wildfire Monitoring: Mobin Habibpour,

Niloufar Alipour Talemi,

John Spodnik,

Camren J Khoury,

Fatemeh Afghah; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Habibpour_2026_CVPR, author = {Habibpour, Mobin and Talemi, Niloufar Alipour and Spodnik, John and Khoury, Camren J and Afghah, Fatemeh}, title = {WildFireVQA: A Large-Scale Radiometric Thermal VQA Benchmark for Aerial Wildfire Monitoring}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9145-9154} }
Rethinking Data Augmentation for Multi-View Underwater Species Dataset: A Data-Centric Analysis of Feature Space Study: Mushfika Sharmin Rahman,

Greg Hamerly; [pdf]
[bibtex]
@InProceedings{Rahman_2026_CVPR, author = {Rahman, Mushfika Sharmin and Hamerly, Greg}, title = {Rethinking Data Augmentation for Multi-View Underwater Species Dataset: A Data-Centric Analysis of Feature Space Study}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9287-9293} }
PathoFusion: Scalable Multimodal Foundations for Integrating Clinical and Preclinical Imaging Analyses in Real-World Pharmaceutical discovery workflows: Arijit Patra,

Phil Scordis; [pdf]
[bibtex]
@InProceedings{Patra_2026_CVPR, author = {Patra, Arijit and Scordis, Phil}, title = {PathoFusion: Scalable Multimodal Foundations for Integrating Clinical and Preclinical Imaging Analyses in Real-World Pharmaceutical discovery workflows}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9251-9260} }
R3PM-Net: Real-time, Robust, Real-world Point Matching Network: Yasaman Kashefbahrami,

Erkut Akdag,

Panagiotis Meletis,

Evgeniya Balmashnova,

Dip Goswami,

Egor Bondarau; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kashefbahrami_2026_CVPR, author = {Kashefbahrami, Yasaman and Akdag, Erkut and Meletis, Panagiotis and Balmashnova, Evgeniya and Goswami, Dip and Bondarau, Egor}, title = {R3PM-Net: Real-time, Robust, Real-world Point Matching Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9174-9183} }
FruitEnsemble: MLLM-Guided Arbitration for Heterogeneous ensemble in Fine-Grained Fruit Recognition: Enhui Yu,

Junhui Li,

Ruitong Lu,

Jialu Li,

Youshan Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Enhui and Li, Junhui and Lu, Ruitong and Li, Jialu and Zhang, Youshan}, title = {FruitEnsemble: MLLM-Guided Arbitration for Heterogeneous ensemble in Fine-Grained Fruit Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9421-9430} }
Can LLM-Generated Text Empower Surgical Vision-Language Pre-training?: Chengan Che,

Chao Wang,

Jiayuan Huang,

Xinyue Chen,

Luis C. Garcia-Peraza-Herrera; [pdf] [arXiv]
[bibtex]
@InProceedings{Che_2026_CVPR, author = {Che, Chengan and Wang, Chao and Huang, Jiayuan and Chen, Xinyue and Garcia-Peraza-Herrera, Luis C.}, title = {Can LLM-Generated Text Empower Surgical Vision-Language Pre-training?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9099-9106} }
Hyperbolic Twins: Efficient Finetuning Vision Language Model for Few-Shot Learning via Hyperbolic Geometry: Teng Jiek See; [pdf]
[bibtex]
@InProceedings{See_2026_CVPR, author = {See, Teng Jiek}, title = {Hyperbolic Twins: Efficient Finetuning Vision Language Model for Few-Shot Learning via Hyperbolic Geometry}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9294-9304} }
STONE: Stable Optimization in Noisy Environments for Robust Vision-Language Models: Sarthak Srivastava,

Kathy Wu; [pdf]
[bibtex]
@InProceedings{Srivastava_2026_CVPR, author = {Srivastava, Sarthak and Wu, Kathy}, title = {STONE: Stable Optimization in Noisy Environments for Robust Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9362-9372} }
VisText-Mosquito: A Unified Multimodal Dataset for Visual Detection, Segmentation, and Textual Explanation on Mosquito Breeding Sites: Md. Adnanul Islam,

Md. Faiyaz Abdullah Sayeedi,

Md. Asaduzzaman Shuvo,

Shahanur Rahman Bappy,

Muhammad Ziaur Rahman,

Md Asiful Islam,

Swakkhar Shatabda; [pdf] [arXiv]
[bibtex]
@InProceedings{Islam_2026_CVPR, author = {Islam, Md. Adnanul and Sayeedi, Md. Faiyaz Abdullah and Shuvo, Md. Asaduzzaman and Bappy, Shahanur Rahman and Rahman, Muhammad Ziaur and Islam, Md Asiful and Shatabda, Swakkhar}, title = {VisText-Mosquito: A Unified Multimodal Dataset for Visual Detection, Segmentation, and Textual Explanation on Mosquito Breeding Sites}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9165-9173} }
HierSum: A Global and Local Attention Mechanism for Video Summarization: Apoorva Beedu,

Irfan Essa; [pdf] [arXiv]
[bibtex]
@InProceedings{Beedu_2026_CVPR, author = {Beedu, Apoorva and Essa, Irfan}, title = {HierSum: A Global and Local Attention Mechanism for Video Summarization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {9069-9079} }