{"id":"https://openalex.org/W4400141865","doi":"https://doi.org/10.48550/arxiv.2406.19188","title":"Averaging log-likelihoods in direct alignment","display_name":"Averaging log-likelihoods in direct alignment","publication_year":2024,"publication_date":"2024-06-27","ids":{"openalex":"https://openalex.org/W4400141865","doi":"https://doi.org/10.48550/arxiv.2406.19188"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2406.19188","pdf_url":"http://arxiv.org/pdf/2406.19188","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2406.19188","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5032092481","display_name":"Nathan Grinsztajn","orcid":"https://orcid.org/0000-0001-6817-5972"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Grinsztajn, Nathan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045192384","display_name":"Yannis Flet-Berliac","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Flet-Berliac, Yannis","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043355670","display_name":"Mohammad Gheshlaghi Azar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Azar, Mohammad Gheshlaghi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024394972","display_name":"Florian Strub","orcid":"https://orcid.org/0000-0001-7271-5345"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Strub, Florian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056710085","display_name":"Bill X. Wu","orcid":"https://orcid.org/0000-0001-5759-0935"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Bill","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110171801","display_name":"Eugene Choi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Choi, Eugene","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5093992817","display_name":"Chris Cremer","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cremer, Chris","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104338915","display_name":"Arash Ahmadian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ahmadian, Arash","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073570605","display_name":"Yash Chandak","orcid":"https://orcid.org/0009-0009-2080-566X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chandak, Yash","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065100569","display_name":"Olivier Pietquin","orcid":"https://orcid.org/0000-0002-5386-465X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pietquin, Olivier","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5110482875","display_name":"Matthieu Geist","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Geist, Matthieu","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":84},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.994,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.994,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9907,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10885","display_name":"Gene expression and cancer classification","score":0.9734,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4474471},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.34675407},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.29425246}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2406.19188","pdf_url":"http://arxiv.org/pdf/2406.19188","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2406.19188","pdf_url":"http://arxiv.org/pdf/2406.19188","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4396701345","https://openalex.org/W4396696052","https://openalex.org/W4395014643","https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2382290278","https://openalex.org/W2376932109","https://openalex.org/W2358668433","https://openalex.org/W2001405890"],"abstract_inverted_index":{"To":[0,98],"better":[1],"align":[2],"Large":[3],"Language":[4],"Models":[5],"(LLMs)":[6],"with":[7,122],"human":[8],"judgment,":[9],"Reinforcement":[10],"Learning":[11],"from":[12,40],"Human":[13],"Feedback":[14],"(RLHF)":[15],"learns":[16],"a":[17,36,41,46,104,115,153],"reward":[18,48],"model":[19,38],"and":[20,73,160],"then":[21],"optimizes":[22],"it":[23],"using":[24],"regularized":[25],"RL.":[26],"Recently,":[27],"direct":[28,109],"alignment":[29,110],"methods":[30,51],"were":[31],"introduced":[32],"to":[33,64,119],"learn":[34],"such":[35,150],"fine-tuned":[37],"directly":[39],"preference":[42],"dataset":[43],"without":[44],"computing":[45],"proxy":[47],"function.":[49],"These":[50],"are":[52,94],"built":[53],"upon":[54],"contrastive":[55],"losses":[56],"involving":[57],"the":[58,65,74,80,83,123,127,131,139,142,147,156],"log-likelihood":[59,75,140],"of":[60,149,158],"(dis)preferred":[61],"completions":[62,69],"according":[63],"trained":[66],"model.":[67],"However,":[68],"have":[70],"various":[71],"lengths,":[72],"is":[76,90],"not":[77],"length-invariant.":[78,111],"On":[79],"other":[81],"side,":[82],"cross-entropy":[84],"loss":[85],"used":[86],"in":[87],"supervised":[88],"training":[89],"length-invariant,":[91],"as":[92],"batches":[93],"typically":[95],"averaged":[96],"token-wise.":[97],"reconcile":[99],"these":[100],"approaches,":[101],"we":[102,113],"introduce":[103,114],"principled":[105],"approach":[106],"for":[107,130],"making":[108],"Formally,":[112],"new":[116],"averaging":[117,138],"operator,":[118],"be":[120],"composed":[121],"optimality":[124],"operator":[125],"giving":[126],"best":[128],"policy":[129],"underlying":[132],"RL":[133],"problem.":[134],"It":[135],"translates":[136],"into":[137],"within":[141],"loss.":[143],"We":[144],"empirically":[145],"study":[146],"effect":[148],"averaging,":[151],"observing":[152],"trade-off":[154],"between":[155],"length":[157],"generations":[159],"their":[161],"scores.":[162]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4400141865","counts_by_year":[],"updated_date":"2025-01-05T05:33:55.731072","created_date":"2024-06-29"}