{"id":"https://openalex.org/W4402705358","doi":"https://doi.org/10.48550/arxiv.2408.15593","title":"Skills Regularized Task Decomposition for Multi-task Offline\n Reinforcement Learning","display_name":"Skills Regularized Task Decomposition for Multi-task Offline\n Reinforcement Learning","publication_year":2024,"publication_date":"2024-08-28","ids":{"openalex":"https://openalex.org/W4402705358","doi":"https://doi.org/10.48550/arxiv.2408.15593"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2408.15593","pdf_url":"http://arxiv.org/pdf/2408.15593","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2408.15593","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047784615","display_name":"Minjong Yoo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yoo, Minjong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036306083","display_name":"Sangwoo Cho","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cho, Sangwoo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5001227049","display_name":"Honguk Woo","orcid":"https://orcid.org/0000-0001-6948-3440"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Woo, Honguk","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.908383,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":78,"max":88},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9405,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9405,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8074243},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.7511688},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.60542023},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.563552},{"id":"https://openalex.org/C124681953","wikidata":"https://www.wikidata.org/wiki/Q339062","display_name":"Decomposition","level":2,"score":0.5599084},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37597317},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.2615878},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.10803741},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.094965845},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2408.15593","pdf_url":"http://arxiv.org/pdf/2408.15593","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2408.15593","pdf_url":"http://arxiv.org/pdf/2408.15593","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4310083477","https://openalex.org/W2920061524","https://openalex.org/W2366903352","https://openalex.org/W2328553770","https://openalex.org/W2145821588","https://openalex.org/W2107890255","https://openalex.org/W2106552856","https://openalex.org/W2086122291","https://openalex.org/W2038908348","https://openalex.org/W1977959518"],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1],"(RL)":[2],"with":[3,31,52,176,198],"diverse":[4],"offline":[5,41,45,186,215],"datasets":[6,90,107,197,226],"can":[7],"have":[8],"the":[9,13,19,53,63,71,76,102,151,157,183,191,221],"advantage":[10],"of":[11,15,97,185,224],"leveraging":[12],"relation":[14],"multiple":[16,67],"tasks":[17,149,166,237],"and":[18,49,122,132,148,155,227,238],"common":[20,117],"skills":[21,118,147,178,204],"learned":[22,121,189],"across":[23,105],"those":[24,106],"tasks,":[25,68],"hence":[26],"allowing":[27],"us":[28],"to":[29,61,126,144,164,167,202,220],"deal":[30],"real-world":[32],"complex":[33],"problems":[34],"efficiently":[35],"in":[36,130],"a":[37,83,111,128,161],"data-driven":[38],"way.":[39],"In":[40,78,135],"RL":[42,86,187,216],"where":[43],"only":[44],"data":[46,72],"is":[47,55,58,218],"used":[48,123],"online":[50],"interaction":[51],"environment":[54],"restricted,":[56],"it":[57,228],"yet":[59],"difficult":[60],"achieve":[62],"optimal":[64],"policy":[65],"for":[66,75,115,205,233],"especially":[69],"when":[70],"quality":[73],"varies":[74],"tasks.":[77,241],"this":[79,136],"paper,":[80],"we":[81,109,139,194,210],"present":[82],"skill-based":[84],"multi-task":[85,214],"technique":[87],"on":[88,150,190],"heterogeneous":[89],"that":[91,172,212],"are":[92,119,173],"generated":[93],"by":[94],"behavior":[95],"policies":[96],"different":[98],"quality.":[99],"To":[100,181],"learn":[101],"shareable":[103],"knowledge":[104],"effectively,":[108],"employ":[110],"task":[112,129],"decomposition":[113],"method":[114],"which":[116],"jointly":[120],"as":[124,160],"guidance":[125],"reformulate":[127],"shared":[131],"achievable":[133],"subtasks.":[134],"joint":[137],"learning,":[138],"use":[140,156],"Wasserstein":[141],"auto-encoder":[142],"(WAE)":[143],"represent":[145],"both":[146],"same":[152],"latent":[153,192],"space":[154],"quality-weighted":[158],"loss":[159],"regularization":[162],"term":[163],"induce":[165],"be":[168],"decomposed":[169],"into":[170],"subtasks":[171],"more":[174],"consistent":[175],"high-quality":[177,203],"than":[179],"others.":[180],"improve":[182],"performance":[184],"agents":[188],"space,":[193],"also":[195],"augment":[196],"imaginary":[199],"trajectories":[200],"relevant":[201],"each":[206],"task.":[207],"Through":[208],"experiments,":[209],"show":[211],"our":[213],"approach":[217],"robust":[219],"mixed":[222],"configurations":[223],"different-quality":[225],"outperforms":[229],"other":[230],"state-of-the-art":[231],"algorithms":[232],"several":[234],"robotic":[235],"manipulation":[236],"drone":[239],"navigation":[240]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4402705358","counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-04-12T03:03:46.075286","created_date":"2024-09-21"}