{"id":"https://openalex.org/W4403322025","doi":"https://doi.org/10.48550/arxiv.2410.02381","title":"MetaMetrics: Calibrating Metrics For Generation Tasks Using Human\n Preferences","display_name":"MetaMetrics: Calibrating Metrics For Generation Tasks Using Human\n Preferences","publication_year":2024,"publication_date":"2024-10-03","ids":{"openalex":"https://openalex.org/W4403322025","doi":"https://doi.org/10.48550/arxiv.2410.02381"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.02381","pdf_url":"http://arxiv.org/pdf/2410.02381","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2410.02381","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5085516032","display_name":"Genta Indra Winata","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Winata, Genta Indra","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5093876039","display_name":"David Anugraha","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Anugraha, David","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5093192290","display_name":"Lucky Susanto","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Susanto, Lucky","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081586873","display_name":"Garry Kuwanto","orcid":"https://orcid.org/0000-0002-1836-0886"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kuwanto, Garry","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5027174211","display_name":"Derry Wijaya","orcid":"https://orcid.org/0000-0002-0848-4703"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wijaya, Derry Tanti","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":78},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10672","display_name":"Design Education and Practice","score":0.3116,"subfield":{"id":"https://openalex.org/subfields/2210","display_name":"Mechanical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10672","display_name":"Design Education and Practice","score":0.3116,"subfield":{"id":"https://openalex.org/subfields/2210","display_name":"Mechanical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.608393},{"id":"https://openalex.org/C165838908","wikidata":"https://www.wikidata.org/wiki/Q736777","display_name":"Calibration","level":2,"score":0.41528922},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.34051841},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.20904458},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1564843}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.02381","pdf_url":"http://arxiv.org/pdf/2410.02381","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.02381","pdf_url":"http://arxiv.org/pdf/2410.02381","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4402327032","https://openalex.org/W4396701345","https://openalex.org/W4396696052","https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2382290278","https://openalex.org/W2376932109","https://openalex.org/W2358668433","https://openalex.org/W2001405890"],"abstract_inverted_index":{"Understanding":[0],"the":[1,28,65,91,149],"quality":[2],"of":[3,31,60,68,93,151,160],"a":[4,74,86,144],"performance":[5],"evaluation":[6,150],"metric":[7,26,104],"is":[8,51,132],"crucial":[9],"for":[10,147],"ensuring":[11,154],"that":[12,155],"model":[13],"outputs":[14],"align":[15],"with":[16,100,128],"human":[17,61,101,129,161],"preferences.":[18,102],"However,":[19],"it":[20,50],"remains":[21],"unclear":[22],"how":[23],"well":[24],"each":[25,69],"captures":[27],"diverse":[29,164],"aspects":[30,59],"these":[32],"preferences,":[33],"as":[34],"metrics":[35,56,95,156],"often":[36],"excel":[37],"in":[38,85,109],"one":[39],"particular":[40],"area":[41],"but":[42],"not":[43],"across":[44,82,119,163],"all":[45],"dimensions.":[46],"To":[47],"address":[48],"this,":[49],"essential":[52],"to":[53,57,64,78,96],"systematically":[54],"calibrate":[55],"specific":[58],"preference,":[62],"catering":[63],"unique":[66],"characteristics":[67],"aspect.":[70],"We":[71],"introduce":[72],"MetaMetrics,":[73],"calibrated":[75],"meta-metric":[76],"designed":[77],"evaluate":[79],"generation":[80,152],"tasks":[81],"different":[83],"modalities":[84],"supervised":[87],"manner.":[88],"MetaMetrics":[89,125,143],"optimizes":[90],"combination":[92],"existing":[94],"enhance":[97],"their":[98],"alignment":[99],"Our":[103],"demonstrates":[105],"flexibility":[106],"and":[107,112,122,131,135],"effectiveness":[108],"both":[110],"language":[111],"vision":[113],"downstream":[114],"tasks,":[115,153],"showing":[116],"significant":[117],"benefits":[118],"various":[120],"multilingual":[121],"multi-domain":[123],"scenarios.":[124],"aligns":[126],"closely":[127],"preferences":[130],"highly":[133],"extendable":[134],"easily":[136],"integrable":[137],"into":[138],"any":[139],"application.":[140],"This":[141],"makes":[142],"powerful":[145],"tool":[146],"improving":[148],"are":[157],"more":[158],"representative":[159],"judgment":[162],"contexts.":[165]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4403322025","counts_by_year":[],"updated_date":"2025-04-03T23:43:11.513676","created_date":"2024-10-12"}