{"id":"https://openalex.org/W3130395087","doi":"https://doi.org/10.1609/aaai.v36i8.20798","title":"Online Apprenticeship Learning","display_name":"Online Apprenticeship Learning","publication_year":2022,"publication_date":"2022-06-28","ids":{"openalex":"https://openalex.org/W3130395087","doi":"https://doi.org/10.1609/aaai.v36i8.20798","mag":"3130395087"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v36i8.20798","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/20798/20557","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/20798/20557","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5049062714","display_name":"Lior Shani","orcid":"https://orcid.org/0000-0003-1504-0534"},"institutions":[{"id":"https://openalex.org/I174306211","display_name":"Technion \u2013 Israel Institute of Technology","ror":"https://ror.org/03qryx823","country_code":"IL","type":"education","lineage":["https://openalex.org/I174306211"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Lior Shani","raw_affiliation_strings":["Technion \u2013 Israel Institute of Technology, Israel"],"affiliations":[{"raw_affiliation_string":"Technion \u2013 Israel Institute of Technology, Israel","institution_ids":["https://openalex.org/I174306211"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018613019","display_name":"Tom Zahavy","orcid":"https://orcid.org/0009-0009-2309-922X"},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Tom Zahavy","raw_affiliation_strings":["Deepmind, UK"],"affiliations":[{"raw_affiliation_string":"Deepmind, UK","institution_ids":["https://openalex.org/I4210090411"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5036260775","display_name":"Shie Mannor","orcid":"https://orcid.org/0000-0003-4439-7647"},"institutions":[{"id":"https://openalex.org/I174306211","display_name":"Technion \u2013 Israel Institute of Technology","ror":"https://ror.org/03qryx823","country_code":"IL","type":"education","lineage":["https://openalex.org/I174306211"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Shie Mannor","raw_affiliation_strings":["Technion \u2013 Israel Institute of Technology, Israel\nNvidia Research, Israel"],"affiliations":[{"raw_affiliation_string":"Technion \u2013 Israel Institute of Technology, Israel\nNvidia Research, Israel","institution_ids":["https://openalex.org/I174306211"]}]}],"institution_assertions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.155,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":3,"citation_normalized_percentile":{"value":0.281132,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":76,"max":80},"biblio":{"volume":"36","issue":"8","first_page":"8240","last_page":"8248"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9996,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9995,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/apprenticeship","display_name":"Apprenticeship","score":0.57614374},{"id":"https://openalex.org/keywords/discriminator","display_name":"Discriminator","score":0.48647687}],"concepts":[{"id":"https://openalex.org/C50817715","wikidata":"https://www.wikidata.org/wiki/Q79895177","display_name":"Regret","level":2,"score":0.8304286},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.6429044},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6302943},{"id":"https://openalex.org/C107806365","wikidata":"https://www.wikidata.org/wiki/Q253567","display_name":"Apprenticeship","level":2,"score":0.57614374},{"id":"https://openalex.org/C2779803651","wikidata":"https://www.wikidata.org/wiki/Q5282088","display_name":"Discriminator","level":3,"score":0.48647687},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45247567},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.38772747},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3815078},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.3190955},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.21693388},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.12177107},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v36i8.20798","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/20798/20557","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2102.06924","pdf_url":"https://arxiv.org/pdf/2102.06924","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v36i8.20798","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/20798/20557","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"score":0.59,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":61,"referenced_works":["https://openalex.org/W100039866","https://openalex.org/W1505731132","https://openalex.org/W1575592356","https://openalex.org/W1771410628","https://openalex.org/W1850488217","https://openalex.org/W1986014385","https://openalex.org/W1988790447","https://openalex.org/W1996625075","https://openalex.org/W1999874108","https://openalex.org/W2016384870","https://openalex.org/W2093825590","https://openalex.org/W2099471712","https://openalex.org/W2102847492","https://openalex.org/W2106887613","https://openalex.org/W2113023245","https://openalex.org/W2115738253","https://openalex.org/W2119567691","https://openalex.org/W2121863487","https://openalex.org/W2142641780","https://openalex.org/W2148112459","https://openalex.org/W2158782408","https://openalex.org/W21934178","https://openalex.org/W2280163991","https://openalex.org/W2513180554","https://openalex.org/W2750990725","https://openalex.org/W2753339894","https://openalex.org/W2781726626","https://openalex.org/W2914920107","https://openalex.org/W2949608212","https://openalex.org/W2949916679","https://openalex.org/W2952854274","https://openalex.org/W2962723383","https://openalex.org/W2962879692","https://openalex.org/W2963014947","https://openalex.org/W2963277051","https://openalex.org/W2963301010","https://openalex.org/W2963582321","https://openalex.org/W2970870329","https://openalex.org/W2995411906","https://openalex.org/W2995551516","https://openalex.org/W2997976910","https://openalex.org/W2998050631","https://openalex.org/W2998111914","https://openalex.org/W2999385649","https://openalex.org/W3007034372","https://openalex.org/W3009820880","https://openalex.org/W3020125231","https://openalex.org/W3026615607","https://openalex.org/W3033836998","https://openalex.org/W3034871777","https://openalex.org/W3046395471","https://openalex.org/W3046626913","https://openalex.org/W3086417465","https://openalex.org/W4285526177","https://openalex.org/W4287978073","https://openalex.org/W4288113009","https://openalex.org/W4289010281","https://openalex.org/W4295521014","https://openalex.org/W4301369075","https://openalex.org/W4320013936","https://openalex.org/W4365811251"],"related_works":["https://openalex.org/W4315575041","https://openalex.org/W4287555357","https://openalex.org/W3176362036","https://openalex.org/W3111617249","https://openalex.org/W3013781205","https://openalex.org/W2161367706","https://openalex.org/W2157016390","https://openalex.org/W2122187689","https://openalex.org/W2016425266","https://openalex.org/W1486879240"],"abstract_inverted_index":{"In":[0],"Apprenticeship":[1,59],"Learning":[2],"(AL),":[3],"we":[4,19,112,168],"are":[5],"given":[6],"a":[7,37,114,170],"Markov":[8],"Decision":[9],"Process":[10],"(MDP)":[11],"without":[12],"access":[13],"to":[14,29,35,67,70,151,163,180],"the":[15,41,63,71,76,81,104,123,128,139,149,184,189],"cost":[16,49],"function.":[17],"Instead,":[18],"observe":[20],"trajectories":[21,143],"sampled":[22],"by":[23,88,192],"an":[24,53,131,153],"expert":[25,72,142],"that":[26,39,80,136,197],"acts":[27],"according":[28],"some":[30,45,178],"policy.":[31],"The":[32],"goal":[33],"is":[34,65,122,186],"find":[36],"policy":[38,98],"matches":[40],"expert's":[42],"performance":[43],"on":[44,138],"predefined":[46],"set":[47],"of":[48,56,125,141,173],"functions.":[50],"We":[51,78],"introduce":[52],"online":[54],"variant":[55,172],"AL":[57,165],"(Online":[58],"Learning;":[60],"OAL),":[61],"where":[62,120,183],"agent":[64],"expected":[66],"perform":[68],"comparably":[69],"while":[73],"interacting":[74],"with":[75,117,127,188],"environment.":[77],"show":[79],"OAL":[82,198],"problem":[83],"can":[84],"be":[85],"effectively":[86],"solved":[87],"combining":[89],"two":[90],"mirror":[91],"descent":[92],"based":[93],"no-regret":[94],"algorithms:":[95],"one":[96],"for":[97,102],"optimization":[99],"and":[100,130],"another":[101],"learning":[103],"worst":[105],"case":[106],"cost.":[107],"By":[108],"employing":[109],"optimistic":[110],"exploration,":[111],"derive":[113],"convergent":[115],"algorithm":[116,147,175],"O(sqrt(K))":[118],"regret,":[119],"K":[121],"number":[124],"interactions":[126],"MDP,":[129],"additional":[132],"linear":[133],"error":[134],"term":[135],"depends":[137],"amount":[140],"available.":[144],"Importantly,":[145],"our":[146,174],"avoids":[148],"need":[150],"solve":[152],"MDP":[154],"at":[155],"each":[156],"iteration,":[157],"making":[158],"it":[159],"more":[160],"practical":[161],"compared":[162],"prior":[164],"methods.":[166],"Finally,":[167],"implement":[169],"deep":[171],"which":[176],"shares":[177],"similarities":[179],"GAIL,":[181],"but":[182],"discriminator":[185],"replaced":[187],"costs":[190],"learned":[191],"OAL.":[193],"Our":[194],"simulations":[195],"suggest":[196],"performs":[199],"well":[200],"in":[201],"high":[202],"dimensional":[203],"control":[204],"problems.":[205]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W3130395087","counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":2}],"updated_date":"2025-01-15T22:27:52.716878","created_date":"2021-03-01"}