{"id":"https://openalex.org/W4406443877","doi":"https://doi.org/10.48550/arxiv.2407.17379","title":"MMRA: A Benchmark for Evaluating Multi-Granularity and Multi-Image\n Relational Association Capabilities in Large Visual Language Models","display_name":"MMRA: A Benchmark for Evaluating Multi-Granularity and Multi-Image\n Relational Association Capabilities in Large Visual Language Models","publication_year":2024,"publication_date":"2024-07-24","ids":{"openalex":"https://openalex.org/W4406443877","doi":"https://doi.org/10.48550/arxiv.2407.17379"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.17379","pdf_url":"http://arxiv.org/pdf/2407.17379","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2407.17379","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100682226","display_name":"Siwei Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Siwei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100863175","display_name":"Kang Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Kang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101669635","display_name":"Yu Bai","orcid":"https://orcid.org/0000-0002-4100-8223"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bai, Yu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064871355","display_name":"Yiming Liang","orcid":"https://orcid.org/0000-0001-9193-4789"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liang, Yiming","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023694591","display_name":"Yizhi Li","orcid":"https://orcid.org/0000-0002-4864-2593"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Yizhi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039135087","display_name":"Haoning Wu","orcid":"https://orcid.org/0009-0001-8717-338X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Haoning","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100405312","display_name":"Jianhua Liu","orcid":"https://orcid.org/0000-0001-5698-3446"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, J. H.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023339675","display_name":"Ruibo Liu","orcid":"https://orcid.org/0000-0002-5163-966X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Ruibo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101308310","display_name":"Xingwei Qu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qu, Xingwei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101456600","display_name":"Xuxin Cheng","orcid":"https://orcid.org/0000-0003-2995-9349"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cheng, Xuxin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100326077","display_name":"Ge Zhang","orcid":"https://orcid.org/0000-0003-0704-0657"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Ge","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083410012","display_name":"Wenhao Huang","orcid":"https://orcid.org/0000-0002-0036-6278"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Wenhao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5024599321","display_name":"Chenghua Lin","orcid":"https://orcid.org/0000-0003-3454-2468"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lin, Chenghua","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":77},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9203,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9203,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.8028345},{"id":"https://openalex.org/keywords/granularity","display_name":"Granularity","score":0.7839143},{"id":"https://openalex.org/keywords/association","display_name":"Association (psychology)","score":0.6956852}],"concepts":[{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.8028345},{"id":"https://openalex.org/C177774035","wikidata":"https://www.wikidata.org/wiki/Q1246948","display_name":"Granularity","level":2,"score":0.7839143},{"id":"https://openalex.org/C142853389","wikidata":"https://www.wikidata.org/wiki/Q744778","display_name":"Association (psychology)","level":2,"score":0.6956852},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.66967964},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.61053234},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.51270735},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.43172786},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.26037663},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.11498749},{"id":"https://openalex.org/C58640448","wikidata":"https://www.wikidata.org/wiki/Q42515","display_name":"Cartography","level":1,"score":0.090919495},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.06511995},{"id":"https://openalex.org/C542102704","wikidata":"https://www.wikidata.org/wiki/Q183257","display_name":"Psychotherapist","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.17379","pdf_url":"http://arxiv.org/pdf/2407.17379","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.17379","pdf_url":"http://arxiv.org/pdf/2407.17379","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W936373746","https://openalex.org/W4382701072","https://openalex.org/W4256502920","https://openalex.org/W4226090359","https://openalex.org/W2975817033","https://openalex.org/W2931688134","https://openalex.org/W2378857091","https://openalex.org/W2377919138","https://openalex.org/W2059697060","https://openalex.org/W103652678"],"abstract_inverted_index":{"Given":[0],"the":[1,16,22,49,57,74,126,135,207,218,227,238,250],"remarkable":[2],"success":[3],"that":[4,109,133,173,184,237],"large":[5],"visual":[6],"language":[7,212],"models":[8],"(LVLMs)":[9],"have":[10,176],"achieved":[11],"in":[12,68,128],"image":[13,121,193,224,247],"perception":[14,153],"tasks,":[15,171],"endeavor":[17],"to":[18,94,125,162,191,198,222],"make":[19],"LVLMs":[20,140,160,166,174,186,221,242],"perceive":[21,192,223],"world":[23],"like":[24],"humans":[25],"is":[26],"drawing":[27],"increasing":[28],"attention.":[29],"Current":[30],"multi-modal":[31],"benchmarks":[32],"primarily":[33],"focus":[34],"on":[35,134,169,205],"facts":[36],"or":[37,65],"specific":[38],"topic-related":[39],"knowledge":[40],"contained":[41],"within":[42,226],"individual":[43],"images.":[44,70],"However,":[45],"they":[46],"often":[47],"overlook":[48],"associative":[50],"relations":[51,127],"between":[52],"multiple":[53,202],"images,":[54],"which":[55],"require":[56],"identification":[58],"and":[59,79,96,122,144],"analysis":[60],"of":[61,210,220,229,240],"similarities":[62],"among":[63,107],"entities":[64],"content":[66],"present":[67],"different":[69],"Therefore,":[71],"we":[72,101,216],"propose":[73],"multi-image":[75,139,152,231],"relation":[76,105],"association":[77,232],"task":[78],"a":[80,156,188],"meticulously":[81],"curated":[82],"Multi-granularity":[83],"Multi-image":[84],"Relational":[85],"Association":[86],"(MMRA)":[87],"benchmark,":[88,137],"comprising":[89],"1,024":[90],"samples.":[91],"In":[92],"order":[93],"systematically":[95],"comprehensively":[97],"evaluate":[98],"current":[99,138,241],"LVLMs,":[100],"establish":[102],"an":[103],"associational":[104],"system":[106],"images":[108,203],"contain":[110],"11":[111],"subtasks":[112],"(e.g,":[113],"UsageSimilarity,":[114],"SubEvent)":[115],"at":[116],"two":[117],"granularity":[118],"levels":[119],"(i.e.,":[120],"entity)":[123],"according":[124],"ConceptNet.":[129],"Our":[130,234],"experiments":[131,235],"reveal":[132],"MMRA":[136],"exhibit":[141],"distinct":[142],"advantages":[143],"disadvantages":[145],"across":[146,201],"various":[147],"subtasks.":[148],"Notably,":[149],"fine-grained,":[150],"entity-level":[151],"tasks":[154],"pose":[155],"greater":[157],"challenge":[158],"for":[159],"compared":[161],"image-level":[163],"tasks.":[164],"Moreover,":[165,215],"perform":[167],"poorly":[168],"spatial-related":[170],"indicating":[172],"still":[175],"limited":[177],"spatial":[178],"awareness.":[179],"Additionally,":[180],"our":[181,230],"findings":[182],"indicate":[183],"while":[185],"demonstrate":[187],"strong":[189],"capability":[190],"details,":[194],"enhancing":[195],"their":[196,211],"ability":[197,219],"associate":[199],"information":[200],"hinges":[204],"improving":[206],"reasoning":[208],"capabilities":[209],"model":[213,246],"component.":[214],"explored":[217],"sequences":[225,248],"context":[228],"task.":[233],"show":[236],"majority":[239],"do":[243],"not":[244],"adequately":[245],"during":[249],"pre-training":[251],"process.":[252]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4406443877","counts_by_year":[],"updated_date":"2025-04-19T02:54:39.858793","created_date":"2025-01-16"}