{"id":"https://openalex.org/W2903634288","doi":"https://doi.org/10.1609/aaai.v33i01.33016054","title":"Successor Features Based Multi-Agent RL for Event-Based Decentralized MDPs","display_name":"Successor Features Based Multi-Agent RL for Event-Based Decentralized MDPs","publication_year":2019,"publication_date":"2019-07-17","ids":{"openalex":"https://openalex.org/W2903634288","doi":"https://doi.org/10.1609/aaai.v33i01.33016054","mag":"2903634288"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v33i01.33016054","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/4561/4439","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/4561/4439","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101214183","display_name":"Tarun Gupta","orcid":null},"institutions":[{"id":"https://openalex.org/I65181880","display_name":"Indian Institute of Technology Hyderabad","ror":"https://ror.org/01j4v3x97","country_code":"IN","type":"education","lineage":["https://openalex.org/I65181880"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Tarun Gupta","raw_affiliation_strings":["Indian Institute of Technology Hyderabad"],"affiliations":[{"raw_affiliation_string":"Indian Institute of Technology Hyderabad","institution_ids":["https://openalex.org/I65181880"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101059249","display_name":"Akshat Kumar","orcid":null},"institutions":[{"id":"https://openalex.org/I79891267","display_name":"Singapore Management University","ror":"https://ror.org/050qmg959","country_code":"SG","type":"education","lineage":["https://openalex.org/I79891267"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Akshat Kumar","raw_affiliation_strings":["Singapore Management University"],"affiliations":[{"raw_affiliation_string":"Singapore Management University","institution_ids":["https://openalex.org/I79891267"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5000689728","display_name":"Praveen Paruchuri","orcid":"https://orcid.org/0000-0001-8071-5409"},"institutions":[{"id":"https://openalex.org/I65181880","display_name":"Indian Institute of Technology Hyderabad","ror":"https://ror.org/01j4v3x97","country_code":"IN","type":"education","lineage":["https://openalex.org/I65181880"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Praveen Paruchuri","raw_affiliation_strings":["Indian Institute of Technology Hyderabad"],"affiliations":[{"raw_affiliation_string":"Indian Institute of Technology Hyderabad","institution_ids":["https://openalex.org/I65181880"]}]}],"institution_assertions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.065,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":4,"citation_normalized_percentile":{"value":0.698712,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":77,"max":79},"biblio":{"volume":"33","issue":"01","first_page":"6054","last_page":"6061"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10524","display_name":"Traffic control and management","score":0.9983,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10524","display_name":"Traffic control and management","score":0.9983,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9937,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10917","display_name":"Smart Grid Security and Resilience","score":0.9736,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/successor-cardinal","display_name":"Successor cardinal","score":0.89412516},{"id":"https://openalex.org/keywords/representation","display_name":"Representation","score":0.5438287}],"concepts":[{"id":"https://openalex.org/C75306776","wikidata":"https://www.wikidata.org/wiki/Q7632662","display_name":"Successor cardinal","level":2,"score":0.89412516},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.83352005},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8313984},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6597431},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.581731},{"id":"https://openalex.org/C68387754","wikidata":"https://www.wikidata.org/wiki/Q7271585","display_name":"Schedule","level":2,"score":0.550196},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5438287},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.5409677},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.52048486},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4900607},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.46984807},{"id":"https://openalex.org/C41550386","wikidata":"https://www.wikidata.org/wiki/Q529909","display_name":"Multi-agent system","level":2,"score":0.45980066},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.4460903},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.42637035},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.35344172},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.2708253},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v33i01.33016054","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/4561/4439","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://ink.library.smu.edu.sg/sis_research/5057","pdf_url":null,"source":{"id":"https://openalex.org/S4306401925","display_name":"Singapore Management University Institutional Knowledge (InK) (Singapore Management University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I79891267","host_organization_name":"Singapore Management University","host_organization_lineage":["https://openalex.org/I79891267"],"host_organization_lineage_names":["Singapore Management University"],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v33i01.33016054","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/4561/4439","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"score":0.46,"display_name":"Sustainable cities and communities","id":"https://metadata.un.org/sdg/11"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":33,"referenced_works":["https://openalex.org/W1749432972","https://openalex.org/W1769870091","https://openalex.org/W2009303086","https://openalex.org/W2019738489","https://openalex.org/W2056354534","https://openalex.org/W2088956500","https://openalex.org/W2097381042","https://openalex.org/W2102764452","https://openalex.org/W2116600741","https://openalex.org/W2142505627","https://openalex.org/W2143867639","https://openalex.org/W2144817350","https://openalex.org/W2145339207","https://openalex.org/W2153821047","https://openalex.org/W2155027007","https://openalex.org/W2156737235","https://openalex.org/W2292533394","https://openalex.org/W2440926996","https://openalex.org/W2568597750","https://openalex.org/W2575718630","https://openalex.org/W2604704722","https://openalex.org/W2617547828","https://openalex.org/W2788249729","https://openalex.org/W2806411719","https://openalex.org/W2962717849","https://openalex.org/W2962938168","https://openalex.org/W2963497400","https://openalex.org/W30453094","https://openalex.org/W3104860527","https://openalex.org/W4232967792","https://openalex.org/W4293676776","https://openalex.org/W4297804343","https://openalex.org/W4301431443"],"related_works":["https://openalex.org/W4235459050","https://openalex.org/W3123320268","https://openalex.org/W3099153698","https://openalex.org/W3038962357","https://openalex.org/W2994519609","https://openalex.org/W2726593096","https://openalex.org/W2473542051","https://openalex.org/W2386410636","https://openalex.org/W2054269721","https://openalex.org/W2025663273"],"abstract_inverted_index":{"Decentralized":[0],"MDPs":[1],"(Dec-MDPs)":[2],"provide":[3],"a":[4,28,42,71,88,161,173],"rigorous":[5],"framework":[6],"for":[7,79,115,146],"collaborative":[8],"multi-agent":[9,163],"sequential":[10],"decisionmaking":[11],"under":[12],"uncertainty.":[13],"However,":[14],"their":[15,49],"computational":[16],"complexity":[17],"limits":[18],"the":[19,94,97,100,144],"practical":[20],"impact.":[21],"To":[22,60],"address":[23],"this,":[24],"we":[25,156],"focus":[26],"on":[27,137,149,160],"class":[29],"of":[30,33,52,96,135,170],"Dec-MDPs":[31,81,117],"consisting":[32],"independent":[34],"collaborating":[35],"agents":[36,171],"that":[37,46,92,130],"are":[38],"tied":[39],"together":[40],"through":[41],"global":[43],"reward":[44],"function":[45,90],"depends":[47],"upon":[48],"entire":[50],"histories":[51],"states":[53],"and":[54,178],"actions":[55],"to":[56],"accomplish":[57],"joint":[58],"tasks.":[59],"overcome":[61],"scalability":[62],"barrier,":[63],"our":[64,158],"main":[65],"contributions":[66],"are:":[67],"(a)":[68],"We":[69,103,127],"propose":[70],"new":[72,150],"actor-critic":[73],"based":[74,109],"Reinforcement":[75],"Learning":[76],"(RL)":[77],"approach":[78,159],"event-based":[80,116],"using":[82,118],"successor":[83],"features":[84],"(SF)":[85],"which":[86,112,166],"is":[87],"value":[89],"representation":[91],"decouples":[93],"dynamics":[95],"environment":[98],"from":[99],"rewards;":[101],"(b)":[102],"then":[104],"present":[105],"Dec-ESR":[106,131],"(Decentralized":[107],"Event":[108],"Successor":[110],"Representation)":[111],"generalizes":[113],"learning":[114,145],"SF":[119],"within":[120],"an":[121],"end-to-end":[122],"deep":[123],"RL":[124],"framework;":[125],"(c)":[126],"also":[128],"show":[129],"allows":[132],"useful":[133],"transfer":[134],"information":[136],"related":[138],"but":[139],"different":[140],"tasks,":[141],"hence":[142],"bootstraps":[143],"faster":[147],"convergence":[148],"tasks;":[151],"(d)":[152],"For":[153],"validation":[154],"purposes,":[155],"test":[157],"large":[162],"coverage":[164],"problem":[165],"models":[167],"schedule":[168],"coordination":[169],"in":[172],"real":[174],"urban":[175],"subway":[176],"network":[177],"achieves":[179],"better":[180],"quality":[181],"solutions":[182],"than":[183],"previous":[184],"best":[185],"approaches.":[186]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2903634288","counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":1}],"updated_date":"2025-01-17T00:28:42.860421","created_date":"2018-12-22"}