{"id":"https://openalex.org/W4322760891","doi":"https://doi.org/10.48550/arxiv.2302.14765","title":"On Learning Intrinsic Rewards for Faster Multi-Agent Reinforcement Learning based MAC Protocol Design in 6G Wireless Networks","display_name":"On Learning Intrinsic Rewards for Faster Multi-Agent Reinforcement Learning based MAC Protocol Design in 6G Wireless Networks","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4322760891","doi":"https://doi.org/10.48550/arxiv.2302.14765"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2302.14765","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2302.14765","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5040577962","display_name":"Luciano Miuccio","orcid":"https://orcid.org/0000-0003-3960-3477"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Miuccio, Luciano","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025838979","display_name":"Salvatore Riolo","orcid":"https://orcid.org/0000-0001-8253-4150"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Riolo, Salvatore","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061429095","display_name":"Mehdi Bennis","orcid":"https://orcid.org/0000-0003-0261-0171"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bennis, Mehdi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5051952503","display_name":"Daniela Panno","orcid":"https://orcid.org/0000-0001-6765-7873"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Panno, Daniela","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":67},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10148","display_name":"Advanced MIMO Systems Optimization","score":0.9961,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10148","display_name":"Advanced MIMO Systems Optimization","score":0.9961,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12791","display_name":"Full-Duplex Wireless Communications","score":0.9911,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11392","display_name":"Energy Harvesting in Wireless Networks","score":0.9852,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/q-learning","display_name":"Q-learning","score":0.5053435}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.92279494},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7973819},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.62046623},{"id":"https://openalex.org/C2780385302","wikidata":"https://www.wikidata.org/wiki/Q367158","display_name":"Protocol (science)","level":3,"score":0.5865633},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5828027},{"id":"https://openalex.org/C761482","wikidata":"https://www.wikidata.org/wiki/Q118093","display_name":"Transmission (telecommunications)","level":2,"score":0.51498014},{"id":"https://openalex.org/C188116033","wikidata":"https://www.wikidata.org/wiki/Q2664563","display_name":"Q-learning","level":3,"score":0.5053435},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.4831852},{"id":"https://openalex.org/C68649174","wikidata":"https://www.wikidata.org/wiki/Q1379116","display_name":"Base station","level":2,"score":0.433406},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.40651312},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.35869172},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.28167704},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.10283896},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09257144},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0},{"id":"https://openalex.org/C204787440","wikidata":"https://www.wikidata.org/wiki/Q188504","display_name":"Alternative medicine","level":2,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C142724271","wikidata":"https://www.wikidata.org/wiki/Q7208","display_name":"Pathology","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2302.14765","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2302.14765","pdf_url":"http://arxiv.org/pdf/2302.14765","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2302.14765","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2302.14765","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4294873804","https://openalex.org/W3096874164","https://openalex.org/W3087814763","https://openalex.org/W2937181779","https://openalex.org/W2537866915","https://openalex.org/W2361647908","https://openalex.org/W2357975469","https://openalex.org/W2166117066","https://openalex.org/W2136202932","https://openalex.org/W2089415692"],"abstract_inverted_index":{"In":[0,63],"this":[1,90,99],"paper,":[2],"we":[3,101,144],"propose":[4],"a":[5,10,24,41,93,107,116,148,155,186],"novel":[6],"framework":[7,184],"for":[8],"designing":[9],"fast":[11],"convergent":[12],"multi-agent":[13],"reinforcement":[14],"learning":[15,35,106,183,203],"(MARL)-based":[16],"medium":[17],"access":[18],"control":[19],"(MAC)":[20],"protocol":[21,50],"operating":[22],"in":[23,111,128,208],"single":[25],"cell":[26],"scenario.":[27],"The":[28,181],"user":[29],"equipments":[30],"(UEs)":[31],"are":[32],"cast":[33],"as":[34,147],"agents":[36],"that":[37,136,177,200],"need":[38],"to":[39,45,54,130,194,212],"learn":[40],"proper":[42],"signaling":[43],"policy":[44,161],"coordinate":[46],"the":[47,55,67,81,87,103,140,165,170,174,195,201,213],"transmission":[48,191],"of":[49,105],"data":[51],"units":[52],"(PDUs)":[53],"base":[56],"station":[57],"(BS)":[58],"over":[59],"shared":[60],"radio":[61],"resources.":[62],"many":[64],"MARL":[65],"tasks,":[66],"conventional":[68],"centralized":[69],"training":[70,95,142],"with":[71],"decentralized":[72],"execution":[73],"(CTDE)":[74],"is":[75],"adopted,":[76],"where":[77],"each":[78,113,157],"agent":[79,114,158],"receives":[80],"same":[82],"global":[83],"extrinsic":[84,166],"reward":[85,119,134,176],"from":[86],"environment.":[88],"However,":[89],"approach":[91],"involves":[92],"long":[94,149],"time.":[96],"To":[97],"overcome":[98],"drawback,":[100],"adopt":[102],"concept":[104],"per-agent":[108],"intrinsic":[109,118,133,175],"reward,":[110,167],"which":[112,168],"learns":[115],"different":[117],"signal":[120],"based":[121],"solely":[122],"on":[123],"its":[124,160],"individual":[125],"behavior.":[126],"Moreover,":[127],"order":[129],"provide":[131],"an":[132],"function":[135],"takes":[137],"into":[138],"account":[139],"long-term":[141],"history,":[143],"represent":[145],"it":[146],"shortterm":[150],"memory":[151],"(LSTM)":[152],"network.":[153],"As":[154],"result,":[156],"updates":[159],"network":[162],"considering":[163],"both":[164],"characterizes":[169],"cooperative":[171],"task,":[172],"and":[173,189],"reflects":[178],"local":[179],"dynamics.":[180],"proposed":[182,202],"yields":[185,205],"faster":[187],"convergence":[188,209],"higher":[190],"performance":[192],"compared":[193,211],"baselines.":[196],"Simulation":[197],"results":[198],"show":[199],"solution":[204],"75%":[206],"improvement":[207],"speed":[210],"most":[214],"performing":[215],"baseline.":[216]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4322760891","counts_by_year":[],"updated_date":"2025-01-22T17:53:15.084409","created_date":"2023-03-03"}