Large Vision - Language Model Learning and Applications


Exploring Cross-Attention Maps in Multi-modal Diffusion Transformers for Training-Free Semantic Segmentation
Rento Yamaguchi,
Keiji Yanai
[pdf]
[bibtex]
@InProceedings{Yamaguchi_2024_ACCV, author = {Yamaguchi, Rento and Yanai, Keiji}, title = {Exploring Cross-Attention Maps in Multi-modal Diffusion Transformers for Training-Free Semantic Segmentation}, booktitle = {Proceedings of the Asian Conference on Computer Vision (ACCV) Workshops}, month = {December}, year = {2024}, pages = {260-274} }

Questioning, Answering, and Captioning for Zero-Shot Detailed Image Caption
Duc-Tuan Luu,
Viet-Tuan Le,
Duc Minh Vo
[pdf]
[bibtex]
@InProceedings{Luu_2024_ACCV, author = {Luu, Duc-Tuan and Le, Viet-Tuan and Vo, Duc Minh}, title = {Questioning, Answering, and Captioning for Zero-Shot Detailed Image Caption}, booktitle = {Proceedings of the Asian Conference on Computer Vision (ACCV) Workshops}, month = {December}, year = {2024}, pages = {242-259} }

DermAI: A Chatbot Assistant for Skin lesion Diagnosis Using Vision and Large Language Models
Viet-Tham Huynh,
Trong-Thuan Nguyen,
Thao Thi-Phuong Dao,
Tam V. Nguyen,
Minh-Triet Tran
[pdf]
[bibtex]
@InProceedings{Huynh_2024_ACCV, author = {Huynh, Viet-Tham and Nguyen, Trong-Thuan and Dao, Thao Thi-Phuong and Nguyen, Tam V. and Tran, Minh-Triet}, title = {DermAI: A Chatbot Assistant for Skin lesion Diagnosis Using Vision and Large Language Models}, booktitle = {Proceedings of the Asian Conference on Computer Vision (ACCV) Workshops}, month = {December}, year = {2024}, pages = {287-301} }

An Approach to Complex Visual Data Interpretation with Vision-Language Models
Thanh-Son Nguyen,
Viet-Tham Huynh,
Van-Loc Nguyen,
Minh-Triet Tran
[pdf]
[bibtex]
@InProceedings{Nguyen_2024_ACCV, author = {Nguyen, Thanh-Son and Huynh, Viet-Tham and Nguyen, Van-Loc and Tran, Minh-Triet}, title = {An Approach to Complex Visual Data Interpretation with Vision-Language Models}, booktitle = {Proceedings of the Asian Conference on Computer Vision (ACCV) Workshops}, month = {December}, year = {2024}, pages = {334-350} }

Mitigating Backdoor Attacks using Activation-Guided Model Editing
Felix Hsieh,
Huy H. Nguyen,
AprilPyone MaungMaung,
Dmitrii Usynin,
Isao Echizen
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hsieh_2024_ACCV, author = {Hsieh, Felix and Nguyen, Huy H. and MaungMaung, AprilPyone and Usynin, Dmitrii and Echizen, Isao}, title = {Mitigating Backdoor Attacks using Activation-Guided Model Editing}, booktitle = {Proceedings of the Asian Conference on Computer Vision (ACCV) Workshops}, month = {December}, year = {2024}, pages = {302-318} }

Exploring Visual Multiple-Choice Question Answering with Pre-trained Vision-Language Models
Gia-Nghia Tran,
Duc-Tuan Luu,
and Dang-Van Thin
[pdf]
[bibtex]
@InProceedings{Tran_2024_ACCV, author = {Tran, Gia-Nghia and Luu, Duc-Tuan and Thin, and Dang-Van}, title = {Exploring Visual Multiple-Choice Question Answering with Pre-trained Vision-Language Models}, booktitle = {Proceedings of the Asian Conference on Computer Vision (ACCV) Workshops}, month = {December}, year = {2024}, pages = {319-333} }

Enhancing Visual Question Answering with Pre-trained Vision-Language Models: An Ensemble Approach at the LAVA Challenge 2024
Trong-Hieu Nguyen-Mau,
Nhu-Binh Nguyen Truc,
Nhu-Vinh Hoang,
Minh-Triet Tran,
Hai-Dang Nguyen
[pdf]
[bibtex]
@InProceedings{Nguyen-Mau_2024_ACCV, author = {Nguyen-Mau, Trong-Hieu and Truc, Nhu-Binh Nguyen and Hoang, Nhu-Vinh and Tran, Minh-Triet and Nguyen, Hai-Dang}, title = {Enhancing Visual Question Answering with Pre-trained Vision-Language Models: An Ensemble Approach at the LAVA Challenge 2024}, booktitle = {Proceedings of the Asian Conference on Computer Vision (ACCV) Workshops}, month = {December}, year = {2024}, pages = {275-286} }