{"id":"https://openalex.org/W2788953735","doi":"https://doi.org/10.1609/aaai.v32i1.11646","title":"On Value Function Representation of Long Horizon Problems","display_name":"On Value Function Representation of Long Horizon Problems","publication_year":2018,"publication_date":"2018-04-29","ids":{"openalex":"https://openalex.org/W2788953735","doi":"https://doi.org/10.1609/aaai.v32i1.11646","mag":"2788953735"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v32i1.11646","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/11646/11505","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/11646/11505","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5006414380","display_name":"Lucas Lehnert","orcid":"https://orcid.org/0000-0001-5897-499X"},"institutions":[{"id":"https://openalex.org/I27804330","display_name":"Brown University","ror":"https://ror.org/05gq02987","country_code":"US","type":"education","lineage":["https://openalex.org/I27804330"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lucas Lehnert","raw_affiliation_strings":["Brown University, Providence, Rhode Island"],"affiliations":[{"raw_affiliation_string":"Brown University, Providence, Rhode Island","institution_ids":["https://openalex.org/I27804330"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089214987","display_name":"Romain Laroche","orcid":"https://orcid.org/0000-0001-7180-2746"},"institutions":[{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Romain Laroche","raw_affiliation_strings":["Microsoft Maluuba, Montreal, QC"],"affiliations":[{"raw_affiliation_string":"Microsoft Maluuba, Montreal, QC","institution_ids":["https://openalex.org/I4210164937"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5049195732","display_name":"Harm van Seijen","orcid":null},"institutions":[{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Harm Van Seijen","raw_affiliation_strings":["Microsoft Maluuba, Montreal, QC"],"affiliations":[{"raw_affiliation_string":"Microsoft Maluuba, Montreal, QC","institution_ids":["https://openalex.org/I4210164937"]}]}],"institution_assertions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.982,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":19,"citation_normalized_percentile":{"value":0.778532,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":91},"biblio":{"volume":"32","issue":"1","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9983,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9983,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9977,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12002","display_name":"Computability, Logic, AI Algorithms","score":0.9858,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.7007618},{"id":"https://openalex.org/keywords/value","display_name":"Value (mathematics)","score":0.5646464},{"id":"https://openalex.org/keywords/representation","display_name":"Representation","score":0.5183912},{"id":"https://openalex.org/keywords/time-horizon","display_name":"Time horizon","score":0.5081365},{"id":"https://openalex.org/keywords/function-approximation","display_name":"Function Approximation","score":0.50445926}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.87420285},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.8518114},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.7007618},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.5990983},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.5646464},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.55798244},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5339692},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5183912},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5109369},{"id":"https://openalex.org/C28761237","wikidata":"https://www.wikidata.org/wiki/Q7805321","display_name":"Time horizon","level":2,"score":0.5081365},{"id":"https://openalex.org/C91873725","wikidata":"https://www.wikidata.org/wiki/Q3445816","display_name":"Function approximation","level":3,"score":0.50445926},{"id":"https://openalex.org/C159176650","wikidata":"https://www.wikidata.org/wiki/Q43261","display_name":"Horizon","level":2,"score":0.48233935},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.46216226},{"id":"https://openalex.org/C72434380","wikidata":"https://www.wikidata.org/wiki/Q230930","display_name":"State space","level":2,"score":0.44624206},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.35121787},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.32357317},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.16763529},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.14636248},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v32i1.11646","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/11646/11505","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v32i1.11646","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/11646/11505","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/11","display_name":"Sustainable cities and communities","score":0.67}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":34,"referenced_works":["https://openalex.org/W1486707268","https://openalex.org/W1515851193","https://openalex.org/W1522301498","https://openalex.org/W1556824961","https://openalex.org/W1754881896","https://openalex.org/W1777239053","https://openalex.org/W1968768508","https://openalex.org/W1988526405","https://openalex.org/W2109910161","https://openalex.org/W2115008305","https://openalex.org/W2123447947","https://openalex.org/W2129670787","https://openalex.org/W2133458291","https://openalex.org/W2136202932","https://openalex.org/W2144846366","https://openalex.org/W2145339207","https://openalex.org/W2335959470","https://openalex.org/W2516137114","https://openalex.org/W2550612212","https://openalex.org/W2575066953","https://openalex.org/W2606568940","https://openalex.org/W2624731731","https://openalex.org/W2626637010","https://openalex.org/W2758731390","https://openalex.org/W2950892788","https://openalex.org/W2952485636","https://openalex.org/W2962847657","https://openalex.org/W2963262099","https://openalex.org/W2963276097","https://openalex.org/W2964121744","https://openalex.org/W2964227312","https://openalex.org/W3103780890","https://openalex.org/W3137695714","https://openalex.org/W607505555"],"related_works":["https://openalex.org/W4288112126","https://openalex.org/W3102715494","https://openalex.org/W3036498527","https://openalex.org/W2998461398","https://openalex.org/W2995459009","https://openalex.org/W2937181779","https://openalex.org/W2788953735","https://openalex.org/W2569146624","https://openalex.org/W2133063479","https://openalex.org/W1519972694"],"abstract_inverted_index":{"In":[0],"Reinforcement":[1,47],"Learning,":[2,48],"an":[3],"intelligent":[4],"agent":[5,24],"has":[6,25],"to":[7,13,26,65],"make":[8],"a":[9,15,29,42,120,138,152],"sequence":[10,19],"of":[11,55,78,82,95,109,172],"decisions":[12],"accomplish":[14],"goal.":[16],"If":[17],"this":[18,49],"is":[20,41,64,87,124],"long,":[21],"then":[22],"the":[23,34,53,56,67,74,79,90,96,107,159,173],"plan":[27],"over":[28],"long":[30,121],"horizon.":[31],"While":[32],"learning":[33],"optimal":[35,57,68,84],"policy":[36],"and":[37,60,93,98,113,136,158,162],"its":[38],"value":[39,58,69,85,111,145,155,165],"function":[40,59,146,156],"well":[43],"studied":[44],"problem":[45],"in":[46],"paper":[50],"focuses":[51],"on":[52,89,106,132,137],"structure":[54],"how":[61],"hard":[62],"it":[63],"represent":[66],"function.":[70],"We":[71],"show":[72,114],"that":[73,115,164],"generalized":[75],"Rademacher":[76],"complexity":[77],"hypothesis":[80],"space":[81,100],"all":[83],"functions":[86,112,166],"dependent":[88],"planning":[91,122],"horizon":[92,123],"independent":[94],"state":[97],"action":[99,110],"size.":[101],"Further,":[102],"we":[103],"present":[104],"bounds":[105],"action-gaps":[108],"they":[116],"can":[117],"collapse":[118],"if":[119],"used.":[125],"The":[126],"theoretical":[127,149],"results":[128,150],"are":[129],"verified":[130],"empirically":[131],"randomly":[133],"generated":[134],"MDPs":[135],"grid-world":[139],"fruit":[140],"collection":[141],"task":[142],"using":[143],"deep":[144],"approximation.":[147],"Our":[148],"highlight":[151],"connection":[153],"between":[154],"approximation":[157],"Options":[160],"framework":[161],"suggest":[163],"should":[167],"be":[168],"decomposed":[169],"along":[170],"bottlenecks":[171],"MDP's":[174],"transition":[175],"dynamics.":[176]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2788953735","counts_by_year":[{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":6},{"year":2018,"cited_by_count":2}],"updated_date":"2025-01-20T04:14:55.435034","created_date":"2018-03-06"}