{"id":"https://openalex.org/W4399510993","doi":"https://doi.org/10.48550/arxiv.2406.04920","title":"Sim-to-real Transfer of Deep Reinforcement Learning Agents for Online\n Coverage Path Planning","display_name":"Sim-to-real Transfer of Deep Reinforcement Learning Agents for Online\n Coverage Path Planning","publication_year":2024,"publication_date":"2024-06-07","ids":{"openalex":"https://openalex.org/W4399510993","doi":"https://doi.org/10.48550/arxiv.2406.04920"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2406.04920","pdf_url":"http://arxiv.org/pdf/2406.04920","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2406.04920","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5083410485","display_name":"Arvi Jonnarth","orcid":"https://orcid.org/0000-0002-3434-2522"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jonnarth, Arvi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072067591","display_name":"Ola Johansson","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Johansson, Ola","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5042087981","display_name":"Michael Felsberg","orcid":"https://orcid.org/0000-0002-6096-3648"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Felsberg, Michael","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":77},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10586","display_name":"Robotic Path Planning Algorithms","score":0.9971,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10586","display_name":"Robotic Path Planning Algorithms","score":0.9971,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9271,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11615","display_name":"Control and Dynamics of Mobile Robots","score":0.9081,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/transfer-of-learning","display_name":"Transfer of learning","score":0.538905}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.80803305},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.577708},{"id":"https://openalex.org/C2777735758","wikidata":"https://www.wikidata.org/wiki/Q817765","display_name":"Path (computing)","level":2,"score":0.5611164},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.538905},{"id":"https://openalex.org/C2776175482","wikidata":"https://www.wikidata.org/wiki/Q1195816","display_name":"Transfer (computing)","level":2,"score":0.5052975},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4612028},{"id":"https://openalex.org/C81074085","wikidata":"https://www.wikidata.org/wiki/Q366872","display_name":"Motion planning","level":3,"score":0.45015},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.43242383},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.2536239},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.13999602},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.11917639},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.07128769},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2406.04920","pdf_url":"http://arxiv.org/pdf/2406.04920","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2406.04920","pdf_url":"http://arxiv.org/pdf/2406.04920","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4310083477","https://openalex.org/W2920061524","https://openalex.org/W2328553770","https://openalex.org/W2145821588","https://openalex.org/W2107890255","https://openalex.org/W2106552856","https://openalex.org/W2086122291","https://openalex.org/W2038908348","https://openalex.org/W1987513656","https://openalex.org/W1977959518"],"abstract_inverted_index":{"Sim-to-real":[0],"transfer":[1,49,183],"presents":[2],"a":[3,66,70,77,109,113,137,151,178],"difficult":[4],"challenge,":[5],"where":[6,85,192],"models":[7],"trained":[8,144],"in":[9,15,146,172,197],"simulation":[10,173,187],"are":[11],"to":[12,27,37,68,94,136,141,188],"be":[13,207],"deployed":[14],"the":[16,23,31,45,62,83,86,91,96,101,105,160,170,189,198],"real":[17,120,190],"world.":[18],"The":[19],"distribution":[20],"shift":[21],"between":[22],"two":[24],"settings":[25],"leads":[26],"biased":[28],"representations":[29],"of":[30,47,50,76,130,200],"perceived":[32],"real-world":[33],"environment,":[34],"and":[35,90,116,123,174],"thus":[36],"suboptimal":[38],"predictions.":[39],"In":[40,60],"this":[41],"work,":[42],"we":[43,81,182],"tackle":[44],"challenge":[46],"sim-to-real":[48,106,161],"reinforcement":[51],"learning":[52,194],"(RL)":[53],"agents":[54],"for":[55,65,134,158],"coverage":[56],"path":[57,71,97],"planning":[58],"(CPP).":[59],"CPP,":[61],"task":[63],"is":[64,88,132,156],"robot":[67,121],"find":[69,149],"that":[72,150],"visits":[73],"every":[74],"point":[75],"confined":[78],"area.":[79],"Specifically,":[80],"consider":[82],"case":[84],"environment":[87,111],"unknown,":[89],"agent":[92,143],"needs":[93],"plan":[95],"online":[98],"while":[99,118,163],"mapping":[100],"environment.":[102],"We":[103,126,148],"bridge":[104],"gap":[107],"through":[108],"semi-virtual":[110],"with":[112,202],"simulated":[114],"sensor":[115],"obstacles,":[117],"including":[119],"kinematics":[122],"real-time":[124],"aspects.":[125],"investigate":[127],"what":[128],"level":[129],"fine-tuning":[131,164],"needed":[133],"adapting":[135],"realistic":[138],"setting,":[139],"comparing":[140],"an":[142],"solely":[145],"simulation.":[147],"high":[152,179],"model":[153,171],"inference":[154,180],"frequency":[155],"sufficient":[157],"reducing":[159],"gap,":[162],"degrades":[165],"performance":[166],"initially.":[167],"By":[168],"training":[169],"deploying":[175],"it":[176],"at":[177],"frequency,":[181],"state-of-the-art":[184],"results":[185],"from":[186],"domain,":[191],"direct":[193],"would":[195,206],"take":[196],"order":[199],"weeks":[201],"manual":[203],"interaction,":[204],"i.e.,":[205],"completely":[208],"infeasible.":[209]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4399510993","counts_by_year":[],"updated_date":"2025-04-19T04:34:18.148572","created_date":"2024-06-11"}