{"id":"https://openalex.org/W4378510476","doi":"https://doi.org/10.48550/arxiv.2305.15363","title":"Inverse Preference Learning: Preference-based RL without a Reward Function","display_name":"Inverse Preference Learning: Preference-based RL without a Reward Function","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4378510476","doi":"https://doi.org/10.48550/arxiv.2305.15363"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2305.15363","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2305.15363","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5014139284","display_name":"Joey Hejna","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hejna, Joey","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5080725225","display_name":"Dorsa Sadigh","orcid":"https://orcid.org/0000-0002-7802-9183"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sadigh, Dorsa","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.712026,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":92,"max":93},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.9678,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.9678,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11072","display_name":"Peroxisome Proliferator-Activated Receptors","score":0.9523,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.944,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/preference-learning","display_name":"Preference learning","score":0.6245384},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.53830993},{"id":"https://openalex.org/keywords/hyperparameter","display_name":"Hyperparameter","score":0.4853517}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.722821},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6670875},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6285651},{"id":"https://openalex.org/C181204326","wikidata":"https://www.wikidata.org/wiki/Q7239820","display_name":"Preference learning","level":3,"score":0.6245384},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5582548},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.53830993},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.49382934},{"id":"https://openalex.org/C8642999","wikidata":"https://www.wikidata.org/wiki/Q4171168","display_name":"Hyperparameter","level":2,"score":0.4853517},{"id":"https://openalex.org/C2781249084","wikidata":"https://www.wikidata.org/wiki/Q908656","display_name":"Preference","level":2,"score":0.4765535},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10781273},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2305.15363","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2305.15363","pdf_url":"http://arxiv.org/pdf/2305.15363","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2305.15363","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2305.15363","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4285602503","https://openalex.org/W4281847915","https://openalex.org/W4281387587","https://openalex.org/W4205377104","https://openalex.org/W3192882013","https://openalex.org/W2937325523","https://openalex.org/W2911102221","https://openalex.org/W2768698792","https://openalex.org/W257970033","https://openalex.org/W1994181006"],"abstract_inverted_index":{"Reward":[0],"functions":[1,25,158],"are":[2],"difficult":[3],"to":[4,9,49,148],"design":[5],"and":[6,52,58,76,131,140,155,164],"often":[7],"hard":[8],"align":[10],"with":[11,41],"human":[12,27],"intent.":[13],"Preference-based":[14],"Reinforcement":[15],"Learning":[16,81],"(RL)":[17],"algorithms":[18],"address":[19],"these":[20],"problems":[21],"by":[22,55],"learning":[23,86],"reward":[24,39,61,107,124,157],"from":[26,87],"feedback.":[28],"However,":[29],"the":[30,100,106,119],"majority":[31],"of":[32,67,137],"preference-based":[33],"RL":[34,43],"methods":[35],"na\\\"ively":[36],"combine":[37],"supervised":[38],"models":[40],"off-the-shelf":[42],"algorithms.":[44],"Contemporary":[45],"approaches":[46,151],"have":[47],"sought":[48],"improve":[50],"performance":[51,146],"query":[53],"complexity":[54],"using":[56,68],"larger":[57],"more":[59,132,149],"complex":[60,70,150],"architectures":[62],"such":[63],"as":[64],"transformers.":[65],"Instead":[66],"highly":[69],"architectures,":[71],"we":[72,116],"develop":[73],"a":[74,97,122,135],"new":[75],"parameter-efficient":[77],"algorithm,":[78],"Inverse":[79],"Preference":[80],"(IPL),":[82],"specifically":[83],"designed":[84],"for":[85,96,121],"offline":[88],"preference":[89],"data.":[90],"Our":[91,126,168],"key":[92],"insight":[93],"is":[94,129,170],"that":[95,152],"fixed":[98],"policy,":[99],"$Q$-function":[101],"encodes":[102],"all":[103],"information":[104],"about":[105],"function,":[108],"effectively":[109],"making":[110],"them":[111],"interchangeable.":[112],"Using":[113],"this":[114],"insight,":[115],"completely":[117],"eliminate":[118],"need":[120],"learned":[123,165],"function.":[125],"resulting":[127],"algorithm":[128],"simpler":[130],"parameter-efficient.":[133],"Across":[134],"suite":[136],"continuous":[138],"control":[139],"robotics":[141],"benchmarks,":[142],"IPL":[143],"attains":[144],"competitive":[145],"compared":[147],"leverage":[153],"transformer-based":[154],"non-Markovian":[156],"while":[159],"having":[160],"fewer":[161],"algorithmic":[162],"hyperparameters":[163],"network":[166],"parameters.":[167],"code":[169],"publicly":[171],"released.":[172]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4378510476","counts_by_year":[{"year":2024,"cited_by_count":6}],"updated_date":"2025-01-22T17:56:55.350255","created_date":"2023-05-27"}