{"id":"https://openalex.org/W4389218156","doi":"https://doi.org/10.48550/arxiv.2311.17911","title":"OPERA: Alleviating Hallucination in Multi-Modal Large Language Models via Over-Trust Penalty and Retrospection-Allocation","display_name":"OPERA: Alleviating Hallucination in Multi-Modal Large Language Models via Over-Trust Penalty and Retrospection-Allocation","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4389218156","doi":"https://doi.org/10.48550/arxiv.2311.17911"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2311.17911","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2311.17911","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5058616951","display_name":"Qidong Huang","orcid":"https://orcid.org/0000-0003-2702-8516"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Qidong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100373003","display_name":"Xiaoyi Dong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dong, Xiaoyi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100406335","display_name":"Pan Zhang","orcid":"https://orcid.org/0000-0001-8496-2730"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Pan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100372342","display_name":"Bin Wang","orcid":"https://orcid.org/0000-0002-2694-1023"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Bin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101615091","display_name":"Conghui He","orcid":"https://orcid.org/0000-0001-8697-695X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"He, Conghui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100365355","display_name":"Jiaqi Wang","orcid":"https://orcid.org/0000-0002-1708-3573"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Jiaqi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010087030","display_name":"Dahua Lin","orcid":"https://orcid.org/0000-0002-8865-7896"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lin, Dahua","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067689180","display_name":"Weiming Zhang","orcid":"https://orcid.org/0000-0001-5576-6108"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Weiming","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5064573190","display_name":"Nenghai Yu","orcid":"https://orcid.org/0000-0003-4417-9316"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Nenghai","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.80578,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":69,"max":80},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T14347","display_name":"Big Data and Cloud Computing Technologies","score":0.8931,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T14347","display_name":"Big Data and Cloud Computing Technologies","score":0.8931,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Deep Learning Applications in Healthcare","score":0.864,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Graph Neural Network Models and Applications","score":0.7689,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/language-understanding","display_name":"Language Understanding","score":0.550045},{"id":"https://openalex.org/keywords/language-modeling","display_name":"Language Modeling","score":0.548189},{"id":"https://openalex.org/keywords/attention-mechanism","display_name":"Attention Mechanism","score":0.535591},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine Translation","score":0.520792},{"id":"https://openalex.org/keywords/topic-modeling","display_name":"Topic Modeling","score":0.520302}],"concepts":[{"id":"https://openalex.org/C530479602","wikidata":"https://www.wikidata.org/wiki/Q1344","display_name":"Opera","level":2,"score":0.7721052},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.6868287},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.48995247},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4105931},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.39763755},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.20472854},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.2029978},{"id":"https://openalex.org/C124952713","wikidata":"https://www.wikidata.org/wiki/Q8242","display_name":"Literature","level":1,"score":0.19083077},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.07682991},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2311.17911","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2311.17911","pdf_url":"http://arxiv.org/pdf/2311.17911","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2311.17911","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2311.17911","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"score":0.71,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, justice, and strong institutions"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W635637022","https://openalex.org/W4391375266","https://openalex.org/W3203355539","https://openalex.org/W3011881386","https://openalex.org/W2748952813","https://openalex.org/W2386740010","https://openalex.org/W2380369880","https://openalex.org/W2377396514","https://openalex.org/W2353500159","https://openalex.org/W2348070968"],"abstract_inverted_index":{"Hallucination,":[0],"posed":[1],"as":[2,67],"a":[3,52,63,68,116,150,168],"pervasive":[4],"challenge":[5],"of":[6,134,175],"multi-modal":[7],"large":[8],"language":[9],"models":[10],"(MLLMs),":[11],"has":[12],"significantly":[13],"impeded":[14],"their":[15],"real-world":[16],"usage":[17],"that":[18,171],"demands":[19],"precise":[20],"judgment.":[21],"Existing":[22],"methods":[23],"mitigate":[24,162],"this":[25,47],"issue":[26,76],"with":[27,30,36,86,142,167],"either":[28],"training":[29],"specific":[31],"designed":[32],"data":[33],"or":[34,81],"inferencing":[35],"external":[37],"knowledge":[38,98],"from":[39],"other":[40],"sources,":[41],"incurring":[42],"inevitable":[43],"additional":[44,78],"costs.":[45],"In":[46],"paper,":[48],"we":[49],"present":[50],"OPERA,":[51],"novel":[53],"MLLM":[54],"decoding":[55,160],"method":[56],"grounded":[57],"in":[58,102,131,178],"an":[59,87],"Over-trust":[60],"Penalty":[61],"and":[62,137,183,201,206],"Retrospection-Allocation":[64],"strategy,":[65],"serving":[66],"nearly":[69],"free":[70],"lunch":[71],"to":[72,96,109,161],"alleviate":[73],"the":[74,97,103,123,132,139,146,154,158,163,173,179,185],"hallucination":[75],"without":[77],"data,":[79],"knowledge,":[80],"training.":[82],"Our":[83,208],"approach":[84],"begins":[85],"interesting":[88],"observation":[89],"that,":[90],"most":[91],"hallucinations":[92],"are":[93],"closely":[94],"tied":[95],"aggregation":[99],"patterns":[100],"manifested":[101],"self-attention":[104],"matrix,":[105],"i.e.,":[106],"MLLMs":[107,200],"tend":[108],"generate":[110],"new":[111],"tokens":[112,136,177],"by":[113],"focusing":[114],"on":[115,145,153,198],"few":[117],"summary":[118,176],"tokens,":[119,182],"but":[120],"not":[121],"all":[122],"previous":[124],"tokens.":[125],"Such":[126],"partial":[127],"over-trust":[128,164],"inclination":[129],"results":[130],"neglecting":[133],"image":[135,140],"describes":[138],"content":[141],"hallucination.":[143],"Based":[144],"observation,":[147],"OPERA":[148,193],"introduces":[149],"penalty":[151],"term":[152],"model":[155],"logits":[156],"during":[157],"beam-search":[159],"issue,":[165],"along":[166],"rollback":[169],"strategy":[170],"retrospects":[172],"presence":[174],"previously":[180],"generated":[181],"re-allocate":[184],"token":[186],"selection":[187],"if":[188],"necessary.":[189],"With":[190],"extensive":[191],"experiments,":[192],"shows":[194],"significant":[195],"hallucination-mitigating":[196],"performance":[197],"different":[199],"metrics,":[202],"proving":[203],"its":[204],"effectiveness":[205],"generality.":[207],"code":[209],"is":[210],"available":[211],"at:":[212],"https://github.com/shikiw/OPERA.":[213]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4389218156","counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2024-11-21T21:26:41.793685","created_date":"2023-12-01"}