{"id":"https://openalex.org/W4381797986","doi":"https://doi.org/10.48550/arxiv.2306.13085","title":"Harnessing Mixed Offline Reinforcement Learning Datasets via Trajectory Weighting","display_name":"Harnessing Mixed Offline Reinforcement Learning Datasets via Trajectory Weighting","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4381797986","doi":"https://doi.org/10.48550/arxiv.2306.13085"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2306.13085","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"journal-article","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2306.13085","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5017776857","display_name":"Zhang-Wei Hong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hong, Zhang-Wei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111774389","display_name":"Pulkit Agrawal","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Agrawal, Pulkit","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080042640","display_name":"R\u00e9mi Tachet des Combes","orcid":"https://orcid.org/0000-0002-0763-8723"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Combes, R\u00e9mi Tachet des","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5089214987","display_name":"Romain Laroche","orcid":"https://orcid.org/0000-0001-7180-2746"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Laroche, Romain","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":67},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9924,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9924,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8268743},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.77701175},{"id":"https://openalex.org/C183115368","wikidata":"https://www.wikidata.org/wiki/Q856577","display_name":"Weighting","level":2,"score":0.7184607},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.69363546},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.61702305},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.5340992},{"id":"https://openalex.org/C196083921","wikidata":"https://www.wikidata.org/wiki/Q7915758","display_name":"Variance (accounting)","level":2,"score":0.523028},{"id":"https://openalex.org/C52740198","wikidata":"https://www.wikidata.org/wiki/Q1539564","display_name":"Importance sampling","level":3,"score":0.49131432},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48237172},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4639034},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.44568175},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.15695682},{"id":"https://openalex.org/C19499675","wikidata":"https://www.wikidata.org/wiki/Q232207","display_name":"Monte Carlo method","level":2,"score":0.13039404},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.09699565},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C121955636","wikidata":"https://www.wikidata.org/wiki/Q4116214","display_name":"Accounting","level":1,"score":0.0},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.0},{"id":"https://openalex.org/C1276947","wikidata":"https://www.wikidata.org/wiki/Q333","display_name":"Astronomy","level":1,"score":0.0},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0},{"id":"https://openalex.org/C126838900","wikidata":"https://www.wikidata.org/wiki/Q77604","display_name":"Radiology","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2306.13085","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2306.13085","pdf_url":"http://arxiv.org/pdf/2306.13085","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2306.13085","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2306.13085","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/17","display_name":"Partnerships for the goals","score":0.43}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4386541577","https://openalex.org/W4312814274","https://openalex.org/W4285370786","https://openalex.org/W3207760230","https://openalex.org/W2536018345","https://openalex.org/W2358353312","https://openalex.org/W2296488620","https://openalex.org/W17155033","https://openalex.org/W1590307681","https://openalex.org/W1496222301"],"abstract_inverted_index":{"Most":[0],"offline":[1,84,143],"reinforcement":[2],"learning":[3],"(RL)":[4],"algorithms":[5,86,193],"return":[6,63],"a":[7,11,131,184],"target":[8,47],"policy":[9,22,48,58,129,158,189],"maximizing":[10],"trade-off":[12],"between":[13],"(1)":[14],"the":[15,20,25,29,33,36,43,46,53,56,61,66,100,116,150,156,161,165,168,171,202,213],"expected":[16],"performance":[17,44,54,153],"gain":[18],"over":[19,155],"behavior":[21,57,128,157],"that":[23,42,70,149,176],"collected":[24],"dataset,":[26],"and":[27,79,93,180],"(2)":[28],"risk":[30],"stemming":[31],"from":[32],"out-of-distribution-ness":[34],"of":[35,45,55,65,75,164,167,186],"induced":[37],"state-action":[38],"occupancy.":[39],"It":[40],"follows":[41],"is":[49,224],"strongly":[50],"related":[51],"to":[52,95,99,122],"and,":[59],"thus,":[60],"trajectory":[62],"distribution":[64],"dataset.":[67,172,203],"We":[68,146,173],"show":[69,107,175],"in":[71,109,170,219],"mixed":[72],"datasets":[73],"consisting":[74],"mostly":[76],"low-return":[77,91],"trajectories":[78,92,98,169],"minor":[80],"high-return":[81],"trajectories,":[82],"state-of-the-art":[83],"RL":[85,144],"are":[87],"overly":[88],"restrained":[89],"by":[90],"fail":[94],"exploit":[96,201],"high-performing":[97],"fullest.":[101],"To":[102],"overcome":[103],"this":[104,187],"issue,":[105],"we":[106,205],"that,":[108,208],"deterministic":[110],"MDPs":[111],"with":[112,141,160,195],"stochastic":[113,220],"initial":[114],"states,":[115],"dataset":[117,126],"sampling":[118,136,198],"can":[119],"be":[120,139,217],"re-weighted":[121,135],"induce":[123],"an":[124],"artificial":[125],"whose":[127],"has":[130],"higher":[132],"return.":[133],"This":[134],"strategy":[137,199],"may":[138,215],"combined":[140,194],"any":[142],"algorithm.":[145],"further":[147],"analyze":[148],"opportunity":[151],"for":[152],"improvement":[154],"correlates":[159],"positive-sided":[162],"variance":[163],"returns":[166],"empirically":[174,206],"while":[177],"CQL,":[178],"IQL,":[179],"TD3+BC":[181],"achieve":[182],"only":[183],"part":[185],"potential":[188],"improvement,":[190],"these":[191],"same":[192],"our":[196],"reweighted":[197],"fully":[200],"Furthermore,":[204],"demonstrate":[207],"despite":[209],"its":[210],"theoretical":[211],"limitation,":[212],"approach":[214],"still":[216],"efficient":[218],"environments.":[221],"The":[222],"code":[223],"available":[225],"at":[226],"https://github.com/Improbable-AI/harness-offline-rl.":[227]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4381797986","counts_by_year":[],"updated_date":"2025-01-21T06:41:32.945542","created_date":"2023-06-24"}