{"id":"https://openalex.org/W4400022733","doi":"https://doi.org/10.48550/arxiv.2406.15736","title":"Evaluating Large Vision-and-Language Models on Children's Mathematical\n Olympiads","display_name":"Evaluating Large Vision-and-Language Models on Children's Mathematical\n Olympiads","publication_year":2024,"publication_date":"2024-06-22","ids":{"openalex":"https://openalex.org/W4400022733","doi":"https://doi.org/10.48550/arxiv.2406.15736"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2406.15736","pdf_url":"http://arxiv.org/pdf/2406.15736","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2406.15736","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5024613828","display_name":"Anoop Cherian","orcid":"https://orcid.org/0000-0002-5566-0351"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cherian, Anoop","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063443405","display_name":"Kuan\u2013Chuan Peng","orcid":"https://orcid.org/0000-0002-2682-9912"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Peng, Kuan-Chuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056968977","display_name":"Suhas Lohit","orcid":"https://orcid.org/0000-0002-0392-3818"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lohit, Suhas","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111238855","display_name":"J. Matthiesen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Matthiesen, Joanna","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069299612","display_name":"Kevin Smith","orcid":"https://orcid.org/0000-0002-6163-191X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Smith, Kevin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5071093940","display_name":"Joshua B. Tenenbaum","orcid":"https://orcid.org/0000-0002-1925-2035"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tenenbaum, Joshua B.","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":77},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T12884","display_name":"Educational Assessment and Pedagogy","score":0.9202,"subfield":{"id":"https://openalex.org/subfields/3304","display_name":"Education"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12884","display_name":"Educational Assessment and Pedagogy","score":0.9202,"subfield":{"id":"https://openalex.org/subfields/3304","display_name":"Education"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10130","display_name":"Mathematics Education and Teaching Techniques","score":0.9005,"subfield":{"id":"https://openalex.org/subfields/3304","display_name":"Education"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/olympiad","display_name":"Olympiad","score":0.8506652}],"concepts":[{"id":"https://openalex.org/C130383907","wikidata":"https://www.wikidata.org/wiki/Q221956","display_name":"Olympiad","level":2,"score":0.8506652},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5531324},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45187375},{"id":"https://openalex.org/C188147891","wikidata":"https://www.wikidata.org/wiki/Q147638","display_name":"Cognitive science","level":1,"score":0.33974588},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.3341692},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.2962525},{"id":"https://openalex.org/C145420912","wikidata":"https://www.wikidata.org/wiki/Q853077","display_name":"Mathematics education","level":1,"score":0.22413486},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.13147491}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2406.15736","pdf_url":"http://arxiv.org/pdf/2406.15736","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2406.15736","pdf_url":"http://arxiv.org/pdf/2406.15736","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W4390472257","https://openalex.org/W4251082295","https://openalex.org/W4229763038","https://openalex.org/W4221051194","https://openalex.org/W3114105351","https://openalex.org/W3024628487","https://openalex.org/W2939603677","https://openalex.org/W2748952813","https://openalex.org/W2587626243"],"abstract_inverted_index":{"Recent":[0],"years":[1,165],"have":[2],"seen":[3],"a":[4,122,156,183,251],"significant":[5,229],"progress":[6],"in":[7,39,77,204],"the":[8,48,78,115,150,212,232,257],"general-purpose":[9],"problem":[10,57],"solving":[11,58,205],"abilities":[12,38,103,137],"of":[13,26,55,65,161,190,235,240,254],"large":[14,50],"vision":[15,70],"and":[16,71,100,147,238,243,264],"language":[17],"models":[18,34,52,237],"(LVLMs),":[19],"such":[20],"as":[21,59],"ChatGPT,":[22],"Gemini,":[23],"etc.;":[24],"some":[25],"these":[27],"breakthroughs":[28],"even":[29],"seem":[30],"to":[31,35,144,186,214,247],"enable":[32],"AI":[33,51,66,236],"outperform":[36],"human":[37],"varied":[40],"tasks":[41],"that":[42,132,140,189,195,225,239,260],"demand":[43],"higher-order":[44],"cognitive":[45],"skills.":[46,148,266],"Are":[47],"current":[49,79],"indeed":[53],"capable":[54],"generalized":[56],"humans":[60],"do?":[61],"A":[62],"systematic":[63],"analysis":[64,223],"capabilities":[67,234,245],"for":[68,207,219],"joint":[69],"text":[72],"reasoning,":[73],"however,":[74],"is":[75,121,227],"missing":[76],"scientific":[80],"literature.":[81],"In":[82],"this":[83,91],"paper,":[84],"we":[85,111,154,170],"make":[86],"an":[87],"effort":[88],"towards":[89],"filling":[90],"gap,":[92],"by":[93],"evaluating":[94],"state-of-the-art":[95],"LVLMs":[96,172,197],"on":[97,174,179,250],"their":[98,145,177,244],"mathematical":[99,136,175],"algorithmic":[101],"reasoning":[102,202,233,255],"using":[104,138],"visuo-linguistic":[105],"problems":[106,113,163,206,217],"from":[107,114,129,152,164],"children's":[108,134,262],"Olympiads.":[109],"Specifically,":[110],"consider":[112],"Mathematical":[116],"Kangaroo":[117],"(MK)":[118],"Olympiad,":[119],"which":[120],"popular":[123],"international":[124],"competition":[125],"targeted":[126],"at":[127],"children":[128],"grades":[130],"1-12,":[131],"tests":[133],"deeper":[135],"puzzles":[139,151,181],"are":[141],"appropriately":[142],"gauged":[143],"age":[146],"Using":[149],"MK,":[153],"created":[155],"dataset,":[157,169],"dubbed":[158],"SMART-840,":[159],"consisting":[160],"840":[162],"2020-2024.":[166],"With":[167],"our":[168,180],"analyze":[171],"power":[173],"reasoning;":[176],"responses":[178],"offer":[182],"direct":[184],"way":[185],"compare":[187],"against":[188],"children.":[191,221],"Our":[192],"results":[193],"show":[194],"modern":[196],"do":[198],"demonstrate":[199],"increasingly":[200],"powerful":[201],"skills":[203],"higher":[208],"grades,":[209],"but":[210],"lack":[211],"foundations":[213],"correctly":[215],"answer":[216],"designed":[218],"younger":[220],"Further":[222],"shows":[224],"there":[226],"no":[228],"correlation":[230],"between":[231],"young":[241],"children,":[242],"appear":[246],"be":[248],"based":[249],"different":[252],"type":[253],"than":[256],"cumulative":[258],"knowledge":[259],"underlies":[261],"mathematics":[263],"logic":[265]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4400022733","counts_by_year":[],"updated_date":"2025-04-23T18:03:40.019153","created_date":"2024-06-26"}