ICCV 2023 Open Access Repository

ICCV Workshop on New Ideas in Vision Transformers

Explaining Through Transformer Input Sampling: Alexandre Englebert,

Sédrick Stassin,

Géraldin Nanfack,

Sidi Ahmed Mahmoudi,

Xavier Siebert,

Olivier Cornu,

Christophe De Vleeschouwer; [pdf] [supp]
[bibtex]
@InProceedings{Englebert_2023_ICCV, author = {Englebert, Alexandre and Stassin, S\'edrick and Nanfack, G\'eraldin and Mahmoudi, Sidi Ahmed and Siebert, Xavier and Cornu, Olivier and De Vleeschouwer, Christophe}, title = {Explaining Through Transformer Input Sampling}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {806-815} }
Actor-Agnostic Multi-Label Action Recognition with Multi-Modal Query: Anindya Mondal,

Sauradip Nag,

Joaquin M Prada,

Xiatian Zhu,

Anjan Dutta; [pdf] [arXiv]
[bibtex]
@InProceedings{Mondal_2023_ICCV, author = {Mondal, Anindya and Nag, Sauradip and Prada, Joaquin M and Zhu, Xiatian and Dutta, Anjan}, title = {Actor-Agnostic Multi-Label Action Recognition with Multi-Modal Query}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {784-794} }
All-pairs Consistency Learning forWeakly Supervised Semantic Segmentation: Weixuan Sun,

Yanhao Zhang,

Zhen Qin,

Zheyuan Liu,

Lin Cheng,

Fanyi Wang,

Yiran Zhong,

Nick Barnes; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2023_ICCV, author = {Sun, Weixuan and Zhang, Yanhao and Qin, Zhen and Liu, Zheyuan and Cheng, Lin and Wang, Fanyi and Zhong, Yiran and Barnes, Nick}, title = {All-pairs Consistency Learning forWeakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {826-837} }
Dual-Contrastive Dual-Consistency Dual-Transformer: A Semi-Supervised Approach to Medical Image Segmentation: Ziyang Wang,

Congying Ma; [pdf]
[bibtex]
@InProceedings{Wang_2023_ICCV, author = {Wang, Ziyang and Ma, Congying}, title = {Dual-Contrastive Dual-Consistency Dual-Transformer: A Semi-Supervised Approach to Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {870-879} }
A Hybrid Visual Transformer for Efficient Deep Human Activity Recognition: Youcef Djenouri,

Ahmed Nabil Belbachir; [pdf]
[bibtex]
@InProceedings{Djenouri_2023_ICCV, author = {Djenouri, Youcef and Belbachir, Ahmed Nabil}, title = {A Hybrid Visual Transformer for Efficient Deep Human Activity Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {721-730} }
Which Tokens to Use? Investigating Token Reduction in Vision Transformers: Joakim Bruslund Haurum,

Sergio Escalera,

Graham W. Taylor,

Thomas B. Moeslund; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Haurum_2023_ICCV, author = {Haurum, Joakim Bruslund and Escalera, Sergio and Taylor, Graham W. and Moeslund, Thomas B.}, title = {Which Tokens to Use? Investigating Token Reduction in Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {773-783} }
Hierarchical Spatiotemporal Transformers for Video Object Segmentation: Jun-Sang Yoo,

Hongjae Lee,

Seung-Won Jung; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yoo_2023_ICCV, author = {Yoo, Jun-Sang and Lee, Hongjae and Jung, Seung-Won}, title = {Hierarchical Spatiotemporal Transformers for Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {795-805} }
IDTransformer: Transformer for Intrinsic Image Decomposition: Partha Das,

Maxime Gevers,

Sezer Karaoglu,

Theo Gevers; [pdf]
[bibtex]
@InProceedings{Das_2023_ICCV, author = {Das, Partha and Gevers, Maxime and Karaoglu, Sezer and Gevers, Theo}, title = {IDTransformer: Transformer for Intrinsic Image Decomposition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {816-825} }
MSViT: Dynamic Mixed-Scale Tokenization for Vision Transformers: Jakob Drachmann Havtorn,

Amélie Royer,

Tijmen Blankevoort,

Babak Ehteshami Bejnordi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Havtorn_2023_ICCV, author = {Havtorn, Jakob Drachmann and Royer, Am\'elie and Blankevoort, Tijmen and Bejnordi, Babak Ehteshami}, title = {MSViT: Dynamic Mixed-Scale Tokenization for Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {838-848} }
Template-Guided Illumination Correction for Document Images with Imperfect Geometric Reconstruction: Felix Hertlein,

Alexander Naumann; [pdf]
[bibtex]
@InProceedings{Hertlein_2023_ICCV, author = {Hertlein, Felix and Naumann, Alexander}, title = {Template-Guided Illumination Correction for Document Images with Imperfect Geometric Reconstruction}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {904-913} }
Spatio-Temporal Convolution-Attention Video Network: Ali Diba,

Vivek Sharma,

Mohammad.M Arzani,

Luc Van Gool; [pdf]
[bibtex]
@InProceedings{Diba_2023_ICCV, author = {Diba, Ali and Sharma, Vivek and Arzani, Mohammad.M and Van Gool, Luc}, title = {Spatio-Temporal Convolution-Attention Video Network}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {859-869} }
TSOSVNet: Teacher-Student Collaborative Knowledge Distillation for Online Signature Verification: Chandra Sekhar V,

Avinash Gautam,

Viswanath P,

Sreeja SR,

Rama Krishna Sai G; [pdf]
[bibtex]
@InProceedings{V_2023_ICCV, author = {V, Chandra Sekhar and Gautam, Avinash and P, Viswanath and Sreeja, SR and G, Rama Krishna Sai}, title = {TSOSVNet: Teacher-Student Collaborative Knowledge Distillation for Online Signature Verification}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {742-751} }
SeMask: Semantically Masked Transformers for Semantic Segmentation: Jitesh Jain,

Anukriti Singh,

Nikita Orlov,

Zilong Huang,

Jiachen Li,

Steven Walton,

Humphrey Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jain_2023_ICCV, author = {Jain, Jitesh and Singh, Anukriti and Orlov, Nikita and Huang, Zilong and Li, Jiachen and Walton, Steven and Shi, Humphrey}, title = {SeMask: Semantically Masked Transformers for Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {752-761} }
TransInpaint: Transformer-Based Image Inpainting with Context Adaptation: Pourya Shamsolmoali,

Masoumeh Zareapoor,

Eric Granger; [pdf]
[bibtex]
@InProceedings{Shamsolmoali_2023_ICCV, author = {Shamsolmoali, Pourya and Zareapoor, Masoumeh and Granger, Eric}, title = {TransInpaint: Transformer-Based Image Inpainting with Context Adaptation}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {849-858} }
Interactive Image Segmentation with Cross-Modality Vision Transformers: Kun Li,

George Vosselman,

Michael Ying Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2023_ICCV, author = {Li, Kun and Vosselman, George and Yang, Michael Ying}, title = {Interactive Image Segmentation with Cross-Modality Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {762-772} }
MOSAIC: Multi-Object Segmented Arbitrary Stylization Using CLIP: Prajwal Ganugula,

Y S S S Santosh Kumar,

N K Sagar Reddy,

Prabhath Chellingi,

Avinash Thakur,

Neeraj Kasera,

C Shyam Anand; [pdf] [arXiv]
[bibtex]
@InProceedings{Ganugula_2023_ICCV, author = {Ganugula, Prajwal and Kumar, Y S S S Santosh and Reddy, N K Sagar and Chellingi, Prabhath and Thakur, Avinash and Kasera, Neeraj and Anand, C Shyam}, title = {MOSAIC: Multi-Object Segmented Arbitrary Stylization Using CLIP}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {892-903} }
On Moving Object Segmentation from Monocular Video with Transformers: Christian Homeyer,

Christoph Schnörr; [pdf] [supp]
[bibtex]
@InProceedings{Homeyer_2023_ICCV, author = {Homeyer, Christian and Schn\"orr, Christoph}, title = {On Moving Object Segmentation from Monocular Video with Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {880-891} }
SCSC: Spatial Cross-Scale Convolution Module to Strengthen Both CNNs and Transformers: Xijun Wang,

Xiaojie Chu,

Chunrui Han,

Xiangyu Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2023_ICCV, author = {Wang, Xijun and Chu, Xiaojie and Han, Chunrui and Zhang, Xiangyu}, title = {SCSC: Spatial Cross-Scale Convolution Module to Strengthen Both CNNs and Transformers}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops}, month = {October}, year = {2023}, pages = {731-741} }