Large Vision - Language Model Learning and Applications
Exploring Cross-Attention Maps in Multi-modal Diffusion Transformers for Training-Free Semantic Segmentation-
[pdf]
[bibtex]@InProceedings{Yamaguchi_2024_ACCV, author = {Yamaguchi, Rento and Yanai, Keiji}, title = {Exploring Cross-Attention Maps in Multi-modal Diffusion Transformers for Training-Free Semantic Segmentation}, booktitle = {Proceedings of the Asian Conference on Computer Vision (ACCV) Workshops}, month = {December}, year = {2024}, pages = {260-274} }
Questioning, Answering, and Captioning for Zero-Shot Detailed Image Caption-
[pdf]
[bibtex]@InProceedings{Luu_2024_ACCV, author = {Luu, Duc-Tuan and Le, Viet-Tuan and Vo, Duc Minh}, title = {Questioning, Answering, and Captioning for Zero-Shot Detailed Image Caption}, booktitle = {Proceedings of the Asian Conference on Computer Vision (ACCV) Workshops}, month = {December}, year = {2024}, pages = {242-259} }
DermAI: A Chatbot Assistant for Skin lesion Diagnosis Using Vision and Large Language Models-
[pdf]
[bibtex]@InProceedings{Huynh_2024_ACCV, author = {Huynh, Viet-Tham and Nguyen, Trong-Thuan and Dao, Thao Thi-Phuong and Nguyen, Tam V. and Tran, Minh-Triet}, title = {DermAI: A Chatbot Assistant for Skin lesion Diagnosis Using Vision and Large Language Models}, booktitle = {Proceedings of the Asian Conference on Computer Vision (ACCV) Workshops}, month = {December}, year = {2024}, pages = {287-301} }
An Approach to Complex Visual Data Interpretation with Vision-Language Models-
[pdf]
[bibtex]@InProceedings{Nguyen_2024_ACCV, author = {Nguyen, Thanh-Son and Huynh, Viet-Tham and Nguyen, Van-Loc and Tran, Minh-Triet}, title = {An Approach to Complex Visual Data Interpretation with Vision-Language Models}, booktitle = {Proceedings of the Asian Conference on Computer Vision (ACCV) Workshops}, month = {December}, year = {2024}, pages = {334-350} }
Mitigating Backdoor Attacks using Activation-Guided Model Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hsieh_2024_ACCV, author = {Hsieh, Felix and Nguyen, Huy H. and MaungMaung, AprilPyone and Usynin, Dmitrii and Echizen, Isao}, title = {Mitigating Backdoor Attacks using Activation-Guided Model Editing}, booktitle = {Proceedings of the Asian Conference on Computer Vision (ACCV) Workshops}, month = {December}, year = {2024}, pages = {302-318} }
Exploring Visual Multiple-Choice Question Answering with Pre-trained Vision-Language Models-
[pdf]
[bibtex]@InProceedings{Tran_2024_ACCV, author = {Tran, Gia-Nghia and Luu, Duc-Tuan and Thin, and Dang-Van}, title = {Exploring Visual Multiple-Choice Question Answering with Pre-trained Vision-Language Models}, booktitle = {Proceedings of the Asian Conference on Computer Vision (ACCV) Workshops}, month = {December}, year = {2024}, pages = {319-333} }
Enhancing Visual Question Answering with Pre-trained Vision-Language Models: An Ensemble Approach at the LAVA Challenge 2024-
[pdf]
[bibtex]@InProceedings{Nguyen-Mau_2024_ACCV, author = {Nguyen-Mau, Trong-Hieu and Truc, Nhu-Binh Nguyen and Hoang, Nhu-Vinh and Tran, Minh-Triet and Nguyen, Hai-Dang}, title = {Enhancing Visual Question Answering with Pre-trained Vision-Language Models: An Ensemble Approach at the LAVA Challenge 2024}, booktitle = {Proceedings of the Asian Conference on Computer Vision (ACCV) Workshops}, month = {December}, year = {2024}, pages = {275-286} }