ACCV 2024 Open Access Repository

Large Vision - Language Model Learning and Applications

Exploring Cross-Attention Maps in Multi-modal Diffusion Transformers for Training-Free Semantic Segmentation: Rento Yamaguchi,

Keiji Yanai; [pdf]
[bibtex]
@InProceedings{Yamaguchi_2024_ACCV, author = {Yamaguchi, Rento and Yanai, Keiji}, title = {Exploring Cross-Attention Maps in Multi-modal Diffusion Transformers for Training-Free Semantic Segmentation}, booktitle = {Proceedings of the Asian Conference on Computer Vision (ACCV) Workshops}, month = {December}, year = {2024}, pages = {260-274} }
Questioning, Answering, and Captioning for Zero-Shot Detailed Image Caption: Duc-Tuan Luu,

Viet-Tuan Le,

Duc Minh Vo; [pdf]
[bibtex]
@InProceedings{Luu_2024_ACCV, author = {Luu, Duc-Tuan and Le, Viet-Tuan and Vo, Duc Minh}, title = {Questioning, Answering, and Captioning for Zero-Shot Detailed Image Caption}, booktitle = {Proceedings of the Asian Conference on Computer Vision (ACCV) Workshops}, month = {December}, year = {2024}, pages = {242-259} }
DermAI: A Chatbot Assistant for Skin lesion Diagnosis Using Vision and Large Language Models: Viet-Tham Huynh,

Trong-Thuan Nguyen,

Thao Thi-Phuong Dao,

Tam V. Nguyen,

Minh-Triet Tran; [pdf]
[bibtex]
@InProceedings{Huynh_2024_ACCV, author = {Huynh, Viet-Tham and Nguyen, Trong-Thuan and Dao, Thao Thi-Phuong and Nguyen, Tam V. and Tran, Minh-Triet}, title = {DermAI: A Chatbot Assistant for Skin lesion Diagnosis Using Vision and Large Language Models}, booktitle = {Proceedings of the Asian Conference on Computer Vision (ACCV) Workshops}, month = {December}, year = {2024}, pages = {287-301} }
An Approach to Complex Visual Data Interpretation with Vision-Language Models: Thanh-Son Nguyen,

Viet-Tham Huynh,

Van-Loc Nguyen,

Minh-Triet Tran; [pdf]
[bibtex]
@InProceedings{Nguyen_2024_ACCV, author = {Nguyen, Thanh-Son and Huynh, Viet-Tham and Nguyen, Van-Loc and Tran, Minh-Triet}, title = {An Approach to Complex Visual Data Interpretation with Vision-Language Models}, booktitle = {Proceedings of the Asian Conference on Computer Vision (ACCV) Workshops}, month = {December}, year = {2024}, pages = {334-350} }
Mitigating Backdoor Attacks using Activation-Guided Model Editing: Felix Hsieh,

Huy H. Nguyen,

AprilPyone MaungMaung,

Dmitrii Usynin,

Isao Echizen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hsieh_2024_ACCV, author = {Hsieh, Felix and Nguyen, Huy H. and MaungMaung, AprilPyone and Usynin, Dmitrii and Echizen, Isao}, title = {Mitigating Backdoor Attacks using Activation-Guided Model Editing}, booktitle = {Proceedings of the Asian Conference on Computer Vision (ACCV) Workshops}, month = {December}, year = {2024}, pages = {302-318} }
Exploring Visual Multiple-Choice Question Answering with Pre-trained Vision-Language Models: Gia-Nghia Tran,

Duc-Tuan Luu,

and Dang-Van Thin; [pdf]
[bibtex]
@InProceedings{Tran_2024_ACCV, author = {Tran, Gia-Nghia and Luu, Duc-Tuan and Thin, and Dang-Van}, title = {Exploring Visual Multiple-Choice Question Answering with Pre-trained Vision-Language Models}, booktitle = {Proceedings of the Asian Conference on Computer Vision (ACCV) Workshops}, month = {December}, year = {2024}, pages = {319-333} }
Enhancing Visual Question Answering with Pre-trained Vision-Language Models: An Ensemble Approach at the LAVA Challenge 2024: Trong-Hieu Nguyen-Mau,

Nhu-Binh Nguyen Truc,

Nhu-Vinh Hoang,

Minh-Triet Tran,

Hai-Dang Nguyen; [pdf]
[bibtex]
@InProceedings{Nguyen-Mau_2024_ACCV, author = {Nguyen-Mau, Trong-Hieu and Truc, Nhu-Binh Nguyen and Hoang, Nhu-Vinh and Tran, Minh-Triet and Nguyen, Hai-Dang}, title = {Enhancing Visual Question Answering with Pre-trained Vision-Language Models: An Ensemble Approach at the LAVA Challenge 2024}, booktitle = {Proceedings of the Asian Conference on Computer Vision (ACCV) Workshops}, month = {December}, year = {2024}, pages = {275-286} }