{"id":"https://openalex.org/W4310922000","doi":"https://doi.org/10.48550/arxiv.2212.03363","title":"Few-Shot Preference Learning for Human-in-the-Loop RL","display_name":"Few-Shot Preference Learning for Human-in-the-Loop RL","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4310922000","doi":"https://doi.org/10.48550/arxiv.2212.03363"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2212.03363","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2212.03363","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5014139284","display_name":"Joey Hejna","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hejna, Joey","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5080725225","display_name":"Dorsa Sadigh","orcid":"https://orcid.org/0000-0002-7802-9183"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sadigh, Dorsa","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.865382,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":88,"max":90},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9986,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9986,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9948,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9874,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/preference-learning","display_name":"Preference learning","score":0.69315845},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.55528665},{"id":"https://openalex.org/keywords/preference-elicitation","display_name":"Preference Elicitation","score":0.5182738},{"id":"https://openalex.org/keywords/human-in-the-loop","display_name":"Human-in-the-loop","score":0.4445339}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7747668},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.74701554},{"id":"https://openalex.org/C181204326","wikidata":"https://www.wikidata.org/wiki/Q7239820","display_name":"Preference learning","level":3,"score":0.69315845},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.68844056},{"id":"https://openalex.org/C2781249084","wikidata":"https://www.wikidata.org/wiki/Q908656","display_name":"Preference","level":2,"score":0.6795908},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.67135733},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.55528665},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.54571295},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.5387223},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5301776},{"id":"https://openalex.org/C2777868144","wikidata":"https://www.wikidata.org/wiki/Q7239817","display_name":"Preference elicitation","level":3,"score":0.5182738},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.51822424},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.49326915},{"id":"https://openalex.org/C2780626000","wikidata":"https://www.wikidata.org/wiki/Q5936775","display_name":"Human-in-the-loop","level":2,"score":0.4445339},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.34351507},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.07178658},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C175444787","wikidata":"https://www.wikidata.org/wiki/Q39072","display_name":"Microeconomics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2212.03363","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2212.03363","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2212.03363","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"display_name":"Peace, justice, and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.46}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4286900255","https://openalex.org/W4200207182","https://openalex.org/W3210700034","https://openalex.org/W2974568947","https://openalex.org/W2947277820","https://openalex.org/W2914800632","https://openalex.org/W2157910771","https://openalex.org/W2126528747","https://openalex.org/W1562775108","https://openalex.org/W1488237461"],"abstract_inverted_index":{"While":[0],"reinforcement":[1],"learning":[2,47,114],"(RL)":[3],"has":[4,20],"become":[5],"a":[6,163,192],"more":[7,134],"popular":[8],"approach":[9],"for":[10,17,64,93,113,158],"robotics,":[11],"designing":[12],"sufficiently":[13],"informative":[14,84],"reward":[15,48,75,115],"functions":[16,49,76],"complex":[18],"tasks":[19,160],"proven":[21],"to":[22,29,41,67,77,98,106,176,204],"be":[23,219],"extremely":[24],"difficult":[25],"due":[26],"their":[27],"inability":[28],"capture":[30],"human":[31,51,66,210],"intent":[32],"and":[33,154,184,216],"policy":[34],"exploitation.":[35],"Preference":[36],"based":[37],"RL":[38,131],"algorithms":[39],"seek":[40],"overcome":[42],"these":[43],"challenges":[44],"by":[45,128,141,182],"directly":[46],"from":[50,208],"feedback.":[52],"Unfortunately,":[53],"prior":[54,151],"work":[55],"either":[56],"requires":[57],"an":[58,119],"unreasonable":[59],"number":[60],"of":[61,74,81,110,125,137,144,165,172,188,213],"queries":[62],"implausible":[63],"any":[65],"answer":[68],"or":[69],"overly":[70],"restricts":[71],"the":[72,79,82,108,123,133,142,170,186],"class":[73],"guarantee":[78],"elicitation":[80],"most":[83,99],"queries,":[85],"resulting":[86],"in":[87,180,200],"models":[88,149],"that":[89,101],"are":[90],"insufficiently":[91],"expressive":[92],"realistic":[94],"robotics":[95],"tasks.":[96],"Contrary":[97],"works":[100],"focus":[102],"on":[103,150,191],"query":[104],"selection":[105],"\\emph{minimize}":[107],"amount":[109,171],"data":[111,127,153],"required":[112],"functions,":[116],"we":[117,146,168],"take":[118],"opposite":[120],"approach:":[121],"\\emph{expanding}":[122],"pool":[124],"available":[126],"viewing":[129],"human-in-the-loop":[130],"through":[132],"flexible":[135],"lens":[136],"multi-task":[138],"learning.":[139],"Motivated":[140],"success":[143],"meta-learning,":[145],"pre-train":[147],"preference":[148],"task":[152],"quickly":[155],"adapt":[156],"them":[157],"new":[159],"using":[161],"only":[162],"handful":[164],"queries.":[166],"Empirically,":[167],"reduce":[169],"online":[173],"feedback":[174],"needed":[175],"train":[177,205],"manipulation":[178],"policies":[179,207],"Meta-World":[181],"20$\\times$,":[183],"demonstrate":[185],"effectiveness":[187],"our":[189,214],"method":[190],"real":[193],"Franka":[194],"Panda":[195],"Robot.":[196],"Moreover,":[197],"this":[198],"reduction":[199],"query-complexity":[201],"allows":[202],"us":[203],"robot":[206],"actual":[209],"users.":[211],"Videos":[212],"results":[215],"code":[217],"can":[218],"found":[220],"at":[221],"https://sites.google.com/view/few-shot-preference-rl/home.":[222]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4310922000","counts_by_year":[{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":3}],"updated_date":"2025-01-06T06:15:45.560920","created_date":"2022-12-21"}