{"id":"https://openalex.org/W4221153037","doi":"https://doi.org/10.48550/arxiv.2203.08542","title":"Lazy-MDPs: Towards Interpretable Reinforcement Learning by Learning When to Act","display_name":"Lazy-MDPs: Towards Interpretable Reinforcement Learning by Learning When to Act","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4221153037","doi":"https://doi.org/10.48550/arxiv.2203.08542"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2203.08542","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":null,"is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"journal-article","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2203.08542","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5043182321","display_name":"Alexis Jacq","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jacq, Alexis","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087706654","display_name":"Johan Ferret","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ferret, Johan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065100569","display_name":"Olivier Pietquin","orcid":"https://orcid.org/0000-0002-5386-465X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pietquin, Olivier","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5110482875","display_name":"Matthieu Geist","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Geist, Matthieu","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.865382,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":76,"max":80},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.9612,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.9612,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/interpretability","display_name":"Interpretability","score":0.79540694}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8711889},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8139656},{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.79540694},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.7123164},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.54537296},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.4601984},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.36040574},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.24863875},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2203.08542","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":null,"is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2203.08542","pdf_url":"http://arxiv.org/pdf/2203.08542","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2203.08542","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2203.08542","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":null,"is_accepted":false,"is_published":false},"sustainable_development_goals":[{"score":0.83,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, justice, and strong institutions"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4315864862","https://openalex.org/W3096874164","https://openalex.org/W2966829450","https://openalex.org/W2937181779","https://openalex.org/W2883749686","https://openalex.org/W2386410636","https://openalex.org/W2357975469","https://openalex.org/W2145363145","https://openalex.org/W1986582023","https://openalex.org/W1985560493"],"abstract_inverted_index":{"Traditionally,":[0],"Reinforcement":[1],"Learning":[2],"(RL)":[3],"aims":[4],"at":[5],"deciding":[6,18],"how":[7],"to":[8,20,35,53,73,85,186],"act":[9,21],"optimally":[10],"for":[11],"an":[12],"artificial":[13],"agent.":[14],"We":[15,97,103,149],"argue":[16],"that":[17,121,182],"when":[19,39],"is":[22],"equally":[23],"important.":[24],"As":[25],"humans,":[26],"we":[27,51,79,118,180],"drift":[28],"from":[29],"default,":[30],"instinctive":[31],"or":[32,178],"memorized":[33],"behaviors":[34,38],"focused,":[36],"thought-out":[37],"required":[40],"by":[41,133],"the":[42,55,99,105,138,141,146,160,165,168],"situation.":[43],"To":[44],"enhance":[45],"RL":[46],"agents":[47,91,183],"with":[48,128],"this":[49],"aptitude,":[50],"propose":[52],"augment":[54],"standard":[56],"Markov":[57],"Decision":[58],"Process":[59],"and":[60,89,113,153,167],"make":[61],"a":[62,74,129,198],"new":[63],"mode":[64],"of":[65,108,131,201],"action":[66],"available:":[67],"being":[68],"lazy,":[69],"which":[70],"defers":[71],"decision-making":[72],"default":[75,147,166,176],"policy.":[76,148,171],"In":[77],"addition,":[78],"penalize":[80],"non-lazy":[81],"actions":[82,155],"in":[83,124,162,190,197],"order":[84],"encourage":[86],"minimal":[87],"effort":[88],"have":[90],"focus":[92],"on":[93],"critical":[94],"decisions":[95],"only.":[96],"name":[98],"resulting":[100],"formalism":[101],"lazy-MDPs.":[102],"study":[104],"theoretical":[106],"properties":[107],"lazy-MDPs,":[109],"expressing":[110],"value":[111],"functions":[112],"characterizing":[114],"optimal":[115],"solutions.":[116],"Then":[117],"empirically":[119],"demonstrate":[120],"policies":[122,174],"learned":[123],"lazy-MDPs":[125],"generally":[126],"come":[127],"form":[130],"interpretability:":[132],"construction,":[134],"they":[135,158],"show":[136],"us":[137],"states":[139,152],"where":[140],"agent":[142],"takes":[143],"control":[144,196],"over":[145],"deem":[150],"those":[151],"corresponding":[154],"important":[156],"since":[157],"explain":[159],"difference":[161],"performance":[163,189],"between":[164],"new,":[169],"lazy":[170],"With":[172],"suboptimal":[173],"as":[175],"(pretrained":[177],"random),":[179],"observe":[181],"are":[184],"able":[185],"get":[187],"competitive":[188],"Atari":[191],"games":[192],"while":[193],"only":[194],"taking":[195],"limited":[199],"subset":[200],"states.":[202]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4221153037","counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1}],"updated_date":"2025-01-05T03:06:53.700477","created_date":"2022-04-03"}