{"id":"https://openalex.org/W4405426201","doi":"https://doi.org/10.48550/arxiv.2408.11424","title":"EMO-LLaMA: Enhancing Facial Emotion Understanding with Instruction\n Tuning","display_name":"EMO-LLaMA: Enhancing Facial Emotion Understanding with Instruction\n Tuning","publication_year":2024,"publication_date":"2024-08-21","ids":{"openalex":"https://openalex.org/W4405426201","doi":"https://doi.org/10.48550/arxiv.2408.11424"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2408.11424","pdf_url":"http://arxiv.org/pdf/2408.11424","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2408.11424","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5059997187","display_name":"Bohao Xing","orcid":"https://orcid.org/0009-0005-5924-4178"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xing, Bohao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103099781","display_name":"Zitong Yu","orcid":"https://orcid.org/0000-0003-0422-6616"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Zitong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058509635","display_name":"Xin Liu","orcid":"https://orcid.org/0000-0002-2242-6139"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Xin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101316448","display_name":"Kaishen Yuan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuan, Kaishen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005658216","display_name":"Qilang Ye","orcid":"https://orcid.org/0009-0004-6907-702X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ye, Qilang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102747855","display_name":"Weicheng Xie","orcid":"https://orcid.org/0000-0001-8946-7472"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xie, Weicheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033035049","display_name":"Huanjing Yue","orcid":"https://orcid.org/0000-0003-2517-9783"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yue, Huanjing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023593015","display_name":"Jingyu Yang","orcid":"https://orcid.org/0009-0000-0203-0236"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Jingyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5018012580","display_name":"Heikki K\u00e4lvi\u00e4inen","orcid":"https://orcid.org/0000-0002-0790-6847"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"K\u00e4lvi\u00e4inen, Heikki","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":{"min":0,"max":84},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.891,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.891,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.7755,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.5309151},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.49457762},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.40983468},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.35983217}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2408.11424","pdf_url":"http://arxiv.org/pdf/2408.11424","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2408.11424","pdf_url":"http://arxiv.org/pdf/2408.11424","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4396701345","https://openalex.org/W4396696052","https://openalex.org/W4391913857","https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2376932109","https://openalex.org/W2358668433","https://openalex.org/W2001405890"],"abstract_inverted_index":{"Facial":[0],"expression":[1],"recognition":[2],"(FER)":[3],"is":[4],"an":[5],"important":[6],"research":[7],"topic":[8],"in":[9,27,51,70,126],"emotional":[10,192],"artificial":[11],"intelligence.":[12],"In":[13,117],"recent":[14],"decades,":[15],"researchers":[16],"have":[17,64],"made":[18],"remarkable":[19],"progress.":[20],"However,":[21,83],"current":[22,80,112],"FER":[23,81,89,102,137,213],"paradigms":[24],"face":[25],"challenges":[26],"generalization,":[28],"lack":[29],"semantic":[30],"information":[31],"aligned":[32],"with":[33,139],"natural":[34],"language,":[35],"and":[36,42,55,74,111,176,211,218],"struggle":[37],"to":[38,88,109,122,159,172,186],"process":[39],"both":[40,174,209],"images":[41],"videos":[43],"within":[44],"a":[45,104,144,154,167,183],"unified":[46],"framework,":[47],"making":[48],"their":[49],"application":[50],"multimodal":[52],"emotion":[53],"understanding":[54,127],"human-computer":[56],"interaction":[57],"difficult.":[58],"Multimodal":[59],"Large":[60],"Language":[61],"Models":[62],"(MLLMs)":[63],"recently":[65],"achieved":[66],"success,":[67],"offering":[68],"advantages":[69],"addressing":[71],"these":[72],"issues":[73],"potentially":[75],"overcoming":[76],"the":[77,191],"limitations":[78],"of":[79,97],"paradigms.":[82],"directly":[84],"applying":[85],"pre-trained":[86],"MLLMs":[87,100],"still":[90],"faces":[91],"several":[92],"challenges.":[93],"Our":[94],"zero-shot":[95],"evaluations":[96],"existing":[98],"open-source":[99],"on":[101],"indicate":[103],"significant":[105],"performance":[106],"gap":[107],"compared":[108],"GPT-4V":[110],"supervised":[113],"state-of-the-art":[114],"(SOTA)":[115],"methods.":[116],"this":[118],"paper,":[119],"we":[120,165,181],"aim":[121],"enhance":[123,160],"MLLMs'":[124],"capabilities":[125],"facial":[128,151,156,162,178],"expressions.":[129],"We":[130,141],"first":[131],"generate":[132],"instruction":[133,216],"data":[134],"for":[135],"five":[136],"datasets":[138],"Gemini.":[140],"then":[142],"propose":[143],"novel":[145],"MLLM,":[146],"named":[147],"EMO-LLaMA,":[148],"which":[149],"incorporates":[150],"priors":[152],"from":[153],"pretrained":[155],"analysis":[157],"network":[158],"human":[161,196],"information.":[163,179],"Specifically,":[164],"design":[166],"Face":[168],"Info":[169],"Mining":[170],"module":[171],"extract":[173],"global":[175],"local":[177],"Additionally,":[180],"utilize":[182],"handcrafted":[184],"prompt":[185],"introduce":[187],"age-gender-race":[188],"attributes,":[189],"considering":[190],"differences":[193],"across":[194,208],"different":[195],"groups.":[197],"Extensive":[198],"experiments":[199],"show":[200],"that":[201],"EMO-LLaMA":[202],"achieves":[203],"SOTA-comparable":[204],"or":[205],"competitive":[206],"results":[207],"static":[210],"dynamic":[212],"datasets.":[214],"The":[215],"dataset":[217],"code":[219],"are":[220],"available":[221],"at":[222],"https://github.com/xxtars/EMO-LLaMA.":[223]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4405426201","counts_by_year":[],"updated_date":"2024-12-17T09:45:40.029843","created_date":"2024-12-16"}