{"id":"https://openalex.org/W2767506186","doi":"https://doi.org/10.1007/s00521-017-3241-z","title":"Deep imitation learning for 3D navigation tasks","display_name":"Deep imitation learning for 3D navigation tasks","publication_year":2017,"publication_date":"2017-12-04","ids":{"openalex":"https://openalex.org/W2767506186","doi":"https://doi.org/10.1007/s00521-017-3241-z","mag":"2767506186","pmid":"https://pubmed.ncbi.nlm.nih.gov/29576690","pmcid":"https://www.ncbi.nlm.nih.gov/pmc/articles/5857289"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1007/s00521-017-3241-z","pdf_url":"https://link.springer.com/content/pdf/10.1007%2Fs00521-017-3241-z.pdf","source":{"id":"https://openalex.org/S147897268","display_name":"Neural Computing and Applications","issn_l":"0941-0643","issn":["0941-0643","1433-3058"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319965","https://openalex.org/P4310319900"],"host_organization_lineage_names":["Springer Nature","Springer Science+Business Media"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"journal-article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007%2Fs00521-017-3241-z.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101769265","display_name":"Ahmed Hussein","orcid":"https://orcid.org/0000-0001-5227-9929"},"institutions":[{"id":"https://openalex.org/I522815984","display_name":"Robert Gordon University","ror":"https://ror.org/04f0qj703","country_code":"GB","type":"education","lineage":["https://openalex.org/I522815984"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Ahmed Hussein","raw_affiliation_strings":["School of Computing Science and Digital Media, Robert Gordon University, The Sir Ian Wood Building, Garthdee Rd, Aberdeen, AB10 7GE, UK"],"affiliations":[{"raw_affiliation_string":"School of Computing Science and Digital Media, Robert Gordon University, The Sir Ian Wood Building, Garthdee Rd, Aberdeen, AB10 7GE, UK","institution_ids":["https://openalex.org/I522815984"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040807448","display_name":"Eyad Elyan","orcid":"https://orcid.org/0000-0002-8342-9026"},"institutions":[{"id":"https://openalex.org/I522815984","display_name":"Robert Gordon University","ror":"https://ror.org/04f0qj703","country_code":"GB","type":"education","lineage":["https://openalex.org/I522815984"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Eyad Elyan","raw_affiliation_strings":["School of Computing Science and Digital Media, Robert Gordon University, The Sir Ian Wood Building, Garthdee Rd, Aberdeen, AB10 7GE, UK"],"affiliations":[{"raw_affiliation_string":"School of Computing Science and Digital Media, Robert Gordon University, The Sir Ian Wood Building, Garthdee Rd, Aberdeen, AB10 7GE, UK","institution_ids":["https://openalex.org/I522815984"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016157034","display_name":"Mohamed Medhat Gaber","orcid":"https://orcid.org/0000-0003-0339-4474"},"institutions":[{"id":"https://openalex.org/I12870472","display_name":"Birmingham City University","ror":"https://ror.org/00t67pt25","country_code":"GB","type":"education","lineage":["https://openalex.org/I12870472"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Mohamed Medhat Gaber","raw_affiliation_strings":["School of Computing and Digital Technology, Birmingham City University, 15 Bartholomew Row, Birmingham, B5 5JU, UK"],"affiliations":[{"raw_affiliation_string":"School of Computing and Digital Technology, Birmingham City University, 15 Bartholomew Row, Birmingham, B5 5JU, UK","institution_ids":["https://openalex.org/I12870472"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5016214321","display_name":"Chrisina Jayne","orcid":"https://orcid.org/0000-0001-7292-2109"},"institutions":[{"id":"https://openalex.org/I522815984","display_name":"Robert Gordon University","ror":"https://ror.org/04f0qj703","country_code":"GB","type":"education","lineage":["https://openalex.org/I522815984"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Chrisina Jayne","raw_affiliation_strings":["School of Computing Science and Digital Media, Robert Gordon University, The Sir Ian Wood Building, Garthdee Rd, Aberdeen, AB10 7GE, UK"],"affiliations":[{"raw_affiliation_string":"School of Computing Science and Digital Media, Robert Gordon University, The Sir Ian Wood Building, Garthdee Rd, Aberdeen, AB10 7GE, UK","institution_ids":["https://openalex.org/I522815984"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5101769265"],"corresponding_institution_ids":["https://openalex.org/I522815984"],"apc_list":{"value":2390,"currency":"EUR","value_usd":2990,"provenance":"doaj"},"apc_paid":{"value":2390,"currency":"EUR","value_usd":2990,"provenance":"doaj"},"fwci":3.592,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":45,"citation_normalized_percentile":{"value":0.923123,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":"29","issue":"7","first_page":"389","last_page":"404"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.997,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.9905,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.81802094},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7368792},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.7036785},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6644331},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.6137842},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.5413463},{"id":"https://openalex.org/C126388530","wikidata":"https://www.wikidata.org/wiki/Q1131737","display_name":"Imitation","level":2,"score":0.5210547},{"id":"https://openalex.org/C188888258","wikidata":"https://www.wikidata.org/wiki/Q7353390","display_name":"Robot learning","level":4,"score":0.5150337},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4762716},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.44864354},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.12376842},{"id":"https://openalex.org/C19966478","wikidata":"https://www.wikidata.org/wiki/Q4810574","display_name":"Mobile robot","level":3,"score":0.07419333},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":11,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1007/s00521-017-3241-z","pdf_url":"https://link.springer.com/content/pdf/10.1007%2Fs00521-017-3241-z.pdf","source":{"id":"https://openalex.org/S147897268","display_name":"Neural Computing and Applications","issn_l":"0941-0643","issn":["0941-0643","1433-3058"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319965","https://openalex.org/P4310319900"],"host_organization_lineage_names":["Springer Nature","Springer Science+Business Media"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://europepmc.org/articles/pmc5857289","pdf_url":"https://europepmc.org/articles/pmc5857289?pdf=render","source":{"id":"https://openalex.org/S4306400806","display_name":"Europe PMC (PubMed Central)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1303153112","host_organization_name":"European Bioinformatics Institute","host_organization_lineage":["https://openalex.org/I1303153112"],"host_organization_lineage_names":["European Bioinformatics Institute"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://research.tees.ac.uk/en/publications/386a704f-9e5d-4c21-bb2d-a06a6c4ddbca","pdf_url":"https://research.tees.ac.uk/files/8283485/Deep_imitation_learning_for_3D_navigation_tasks.pdf","source":{"id":"https://openalex.org/S4306402475","display_name":"Teesside University Research Portal (Teesside University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I874055015","host_organization_name":"Teesside University","host_organization_lineage":["https://openalex.org/I874055015"],"host_organization_lineage_names":["Teesside University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://rgu-repository.worktribe.com/file/246886/1/HUSSEIN%202018%20Deep%20imitation%20learning%20for%203D","pdf_url":"https://rgu-repository.worktribe.com/file/246886/1/HUSSEIN%202018%20Deep%20imitation%20learning%20for%203D","source":{"id":"https://openalex.org/S4306402277","display_name":"OpenAIR@RGU (Robert Gordon University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I522815984","host_organization_name":"Robert Gordon University","host_organization_lineage":["https://openalex.org/I522815984"],"host_organization_lineage_names":["Robert Gordon University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://radar.brookes.ac.uk/radar/items/73fd3df5-3f73-4877-862e-c43373605dd7/1","pdf_url":"https://radar.brookes.ac.uk/radar/file/73fd3df5-3f73-4877-862e-c43373605dd7/1/fulltext.pdf","source":{"id":"https://openalex.org/S4306400541","display_name":"Radar (Oxford Brookes University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I124261462","host_organization_name":"Oxford Brookes University","host_organization_lineage":["https://openalex.org/I124261462"],"host_organization_lineage_names":["Oxford Brookes University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"http://hdl.handle.net/10059/2543","pdf_url":"https://rgu-repository.worktribe.com/preview/293131/HUSSEIN%202018%20Deep%20imitation%20learning%20for%203D.pdf","source":{"id":"https://openalex.org/S4306402277","display_name":"OpenAIR@RGU (Robert Gordon University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I522815984","host_organization_name":"Robert Gordon University","host_organization_lineage":["https://openalex.org/I522815984"],"host_organization_lineage_names":["Robert Gordon University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"http://link.springer.com/10.1007/s00521-017-3241-z","pdf_url":"https://link.springer.com/content/pdf/10.1007/s00521-017-3241-z.pdf","source":{"id":"https://openalex.org/S4306402475","display_name":"Teesside University Research Portal (Teesside University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I874055015","host_organization_name":"Teesside University","host_organization_lineage":["https://openalex.org/I874055015"],"host_organization_lineage_names":["Teesside University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://research.tees.ac.uk/ws/files/8283485/Deep_imitation_learning_for_3D_navigation_tasks.pdf","pdf_url":"https://research.tees.ac.uk/ws/files/8283485/Deep_imitation_learning_for_3D_navigation_tasks.pdf","source":{"id":"https://openalex.org/S4306402475","display_name":"Teesside University Research Portal (Teesside University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I874055015","host_organization_name":"Teesside University","host_organization_lineage":["https://openalex.org/I874055015"],"host_organization_lineage_names":["Teesside University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5857289","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":["National Institutes of Health"],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://www.open-access.bcu.ac.uk/5211/1/Deep%20Imitation%20Learning%20for%203D%20Navigation%20Tasks.pdf","pdf_url":"https://www.open-access.bcu.ac.uk/5211/1/Deep%20Imitation%20Learning%20for%203D%20Navigation%20Tasks.pdf","source":{"id":"https://openalex.org/S4306402654","display_name":"BCU Open Access Repository (Birmingham City University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I12870472","host_organization_name":"Birmingham City University","host_organization_lineage":["https://openalex.org/I12870472"],"host_organization_lineage_names":["Birmingham City University"],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false},{"is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/29576690","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":["National Institutes of Health"],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1007/s00521-017-3241-z","pdf_url":"https://link.springer.com/content/pdf/10.1007%2Fs00521-017-3241-z.pdf","source":{"id":"https://openalex.org/S147897268","display_name":"Neural Computing and Applications","issn_l":"0941-0643","issn":["0941-0643","1433-3058"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319965","https://openalex.org/P4310319900"],"host_organization_lineage_names":["Springer Nature","Springer Science+Business Media"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":38,"referenced_works":["https://openalex.org/W1506176787","https://openalex.org/W1511887321","https://openalex.org/W1520101322","https://openalex.org/W169901042","https://openalex.org/W1757796397","https://openalex.org/W1923344279","https://openalex.org/W1949804828","https://openalex.org/W1971890413","https://openalex.org/W1980969546","https://openalex.org/W1987927386","https://openalex.org/W1999874108","https://openalex.org/W2038689900","https://openalex.org/W2100458216","https://openalex.org/W2104733512","https://openalex.org/W2111781320","https://openalex.org/W2116165953","https://openalex.org/W2116522068","https://openalex.org/W2126105931","https://openalex.org/W2134264591","https://openalex.org/W2141125852","https://openalex.org/W2142641780","https://openalex.org/W2143915598","https://openalex.org/W2145339207","https://openalex.org/W2151210636","https://openalex.org/W2153027856","https://openalex.org/W2163605009","https://openalex.org/W2169209873","https://openalex.org/W2170567160","https://openalex.org/W2173248099","https://openalex.org/W2257979135","https://openalex.org/W2510924756","https://openalex.org/W2560647685","https://openalex.org/W2604382266","https://openalex.org/W2607198029","https://openalex.org/W2609009256","https://openalex.org/W2766504023","https://openalex.org/W2964043796","https://openalex.org/W80680352"],"related_works":["https://openalex.org/W4220829754","https://openalex.org/W3165944253","https://openalex.org/W3153786280","https://openalex.org/W3134526803","https://openalex.org/W3127551068","https://openalex.org/W2930863966","https://openalex.org/W2643884694","https://openalex.org/W2165180011","https://openalex.org/W2126211886","https://openalex.org/W2047027848"],"abstract_inverted_index":{"Deep":[0],"learning":[1,7,18,32,35,41,86,105,122,137,154,171,178,249,265],"techniques":[2],"have":[3],"shown":[4,262],"success":[5],"in":[6,12,33,68,75,94,106,190],"from":[8,92,147,155,244,250],"raw":[9,148,245],"high-dimensional":[10],"data":[11],"various":[13],"applications.":[14,207],"While":[15],"deep":[16,31,84,120,140],"reinforcement":[17,121,136],"is":[19,101,115,202,261],"recently":[20],"gaining":[21],"popularity":[22],"as":[23,132,134],"a":[24,53,83,95,191,198,277],"method":[25,46,87,131,238],"to":[26,47,57,62,88,108,110,117,165,204,218,229,254],"train":[27],"intelligent":[28,49],"agents,":[29],"utilizing":[30],"imitation":[34,85],"has":[36],"been":[37],"scarcely":[38],"explored.":[39],"Imitation":[40],"can":[42,71,239,266],"be":[43,72],"an":[44,256],"efficient":[45],"teach":[48],"agents":[50],"by":[51,172,179],"providing":[52,224],"set":[54],"of":[55,170,177,212,215,271,280],"demonstrations":[56,70,93,156,214],"learn":[58,89,145,241,255],"from.":[59],"However,":[60],"generalizing":[61],"situations":[63],"that":[64,201,235,263],"are":[65,159,184,197],"not":[66],"represented":[67],"the":[69,135,167,175,210,220,230,236,269,272],"challenging,":[73],"especially":[74],"3D":[76,96,192],"environments.":[77],"In":[78],"this":[79],"paper,":[80],"we":[81],"propose":[82],"navigation":[90,188,242],"tasks":[91,189,196,243],"environment.":[97,194],"The":[98,129,181,232],"supervised":[99],"policy":[100,275],"refined":[102],"using":[103,276],"active":[104,264,281],"order":[107],"generalize":[109],"unseen":[111],"situations.":[112],"This":[113,162],"approach":[114],"compared":[116],"two":[118],"popular":[119],"techniques:":[123],"deep-Q-networks":[124],"and":[125,144,157,222],"Asynchronous":[126],"actor-critic":[127],"(A3C).":[128],"proposed":[130,182,237],"well":[133],"methods":[138,183,252],"employ":[139],"convolutional":[141],"neural":[142],"networks":[143],"directly":[146],"visual":[149,246],"input.":[150],"Methods":[151],"for":[152],"combining":[153],"experience":[158,173,251],"also":[160],"investigated.":[161],"combination":[163],"aims":[164],"join":[166],"generalization":[168],"ability":[169],"with":[174],"efficiency":[176],"imitation.":[180],"evaluated":[185],"on":[186],"4":[187],"simulated":[193],"Navigation":[195],"typical":[199],"problem":[200],"relevant":[203],"many":[205],"real":[206],"They":[208],"pose":[209],"challenge":[211],"requiring":[213],"long":[216],"trajectories":[217],"reach":[219],"target":[221],"only":[223],"delayed":[225],"rewards":[226],"(usually":[227],"terminal)":[228],"agent.":[231],"experiments":[233],"show":[234],"successfully":[240],"input":[247],"while":[248],"fail":[253],"effective":[257],"policy.":[258],"Moreover,":[259],"it":[260],"significantly":[267],"improve":[268],"performance":[270],"initially":[273],"learned":[274],"small":[278],"number":[279],"samples.":[282]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2767506186","counts_by_year":[{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":5},{"year":2021,"cited_by_count":9},{"year":2020,"cited_by_count":4},{"year":2019,"cited_by_count":10},{"year":2018,"cited_by_count":5}],"updated_date":"2024-12-13T14:29:49.817526","created_date":"2017-11-17"}