{"id":"https://openalex.org/W2408295367","doi":"https://doi.org/10.4108/eai.3-12-2015.2262878","title":"Reinforcement Learning with Internal Reward for Multi-Agent Cooperation: A Theoretical Approach","display_name":"Reinforcement Learning with Internal Reward for Multi-Agent Cooperation: A Theoretical Approach","publication_year":2016,"publication_date":"2016-01-01","ids":{"openalex":"https://openalex.org/W2408295367","doi":"https://doi.org/10.4108/eai.3-12-2015.2262878","mag":"2408295367"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.4108/eai.3-12-2015.2262878","pdf_url":"http://eudl.eu/pdf/10.4108/eai.3-12-2015.2262878","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"http://eudl.eu/pdf/10.4108/eai.3-12-2015.2262878","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5037814625","display_name":"Fumito Uwano","orcid":"https://orcid.org/0000-0003-4139-2605"},"institutions":[{"id":"https://openalex.org/I20529979","display_name":"University of Electro-Communications","ror":"https://ror.org/02x73b849","country_code":"JP","type":"education","lineage":["https://openalex.org/I20529979"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Fumito Uwano","raw_affiliation_strings":["The University of Electro-Communications W6-309, 1-5-1 Chofugaoka, Chofu, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"The University of Electro-Communications W6-309, 1-5-1 Chofugaoka, Chofu, Tokyo, Japan","institution_ids":["https://openalex.org/I20529979"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055742726","display_name":"Naoki Tatebe","orcid":null},"institutions":[{"id":"https://openalex.org/I20529979","display_name":"University of Electro-Communications","ror":"https://ror.org/02x73b849","country_code":"JP","type":"education","lineage":["https://openalex.org/I20529979"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Naoki Tatebe","raw_affiliation_strings":["The University of Electro-Communications W6-309, 1-5-1 Chofugaoka, Chofu, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"The University of Electro-Communications W6-309, 1-5-1 Chofugaoka, Chofu, Tokyo, Japan","institution_ids":["https://openalex.org/I20529979"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019516292","display_name":"Masaya Nakata","orcid":"https://orcid.org/0000-0003-3428-7890"},"institutions":[{"id":"https://openalex.org/I20529979","display_name":"University of Electro-Communications","ror":"https://ror.org/02x73b849","country_code":"JP","type":"education","lineage":["https://openalex.org/I20529979"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Masaya Nakata","raw_affiliation_strings":["The University of Electro-Communications W6-309, 1-5-1 Chofugaoka, Chofu, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"The University of Electro-Communications W6-309, 1-5-1 Chofugaoka, Chofu, Tokyo, Japan","institution_ids":["https://openalex.org/I20529979"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084452161","display_name":"Keiki Takadama","orcid":"https://orcid.org/0009-0007-0916-5505"},"institutions":[{"id":"https://openalex.org/I20529979","display_name":"University of Electro-Communications","ror":"https://ror.org/02x73b849","country_code":"JP","type":"education","lineage":["https://openalex.org/I20529979"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Keiki Takadama","raw_affiliation_strings":["The University of Electro-Communications W6-309, 1-5-1 Chofugaoka, Chofu, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"The University of Electro-Communications W6-309, 1-5-1 Chofugaoka, Chofu, Tokyo, Japan","institution_ids":["https://openalex.org/I20529979"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5035445712","display_name":"Tim Kovacs","orcid":"https://orcid.org/0000-0002-9492-5834"},"institutions":[{"id":"https://openalex.org/I36234482","display_name":"University of Bristol","ror":"https://ror.org/0524sp257","country_code":"GB","type":"education","lineage":["https://openalex.org/I36234482"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Tim Kovacs","raw_affiliation_strings":["The University of Bristol Merchant Venturers Building, Woodland Road, Clifton BS8 1UB, United Kingdom"],"affiliations":[{"raw_affiliation_string":"The University of Bristol Merchant Venturers Building, Woodland Road, Clifton BS8 1UB, United Kingdom","institution_ids":["https://openalex.org/I36234482"]}]}],"institution_assertions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":66},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9979,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9979,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11031","display_name":"Game Theory and Applications","score":0.9765,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10328","display_name":"Supply Chain and Inventory Management","score":0.976,"subfield":{"id":"https://openalex.org/subfields/1404","display_name":"Management Information Systems"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/internal-model","display_name":"Internal model","score":0.4256964},{"id":"https://openalex.org/keywords/error-driven-learning","display_name":"Error-driven learning","score":0.42388907}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.9060366},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.69079274},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.632297},{"id":"https://openalex.org/C41550386","wikidata":"https://www.wikidata.org/wiki/Q529909","display_name":"Multi-agent system","level":2,"score":0.48304397},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4336697},{"id":"https://openalex.org/C28427503","wikidata":"https://www.wikidata.org/wiki/Q13580300","display_name":"Internal model","level":3,"score":0.4256964},{"id":"https://openalex.org/C47932503","wikidata":"https://www.wikidata.org/wiki/Q5395689","display_name":"Error-driven learning","level":3,"score":0.42388907},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.123119235},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.087587535},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.08535224}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.4108/eai.3-12-2015.2262878","pdf_url":"http://eudl.eu/pdf/10.4108/eai.3-12-2015.2262878","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":false,"landing_page_url":"https://doaj.org/article/e75af3a4110c43ada798e036ff209644","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.4108/eai.3-12-2015.2262878","pdf_url":"http://eudl.eu/pdf/10.4108/eai.3-12-2015.2262878","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/17","score":0.51,"display_name":"Partnerships for the goals"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":6,"referenced_works":["https://openalex.org/W1542094515","https://openalex.org/W1995663008","https://openalex.org/W2263562440","https://openalex.org/W2284276045","https://openalex.org/W2407262400","https://openalex.org/W3011120880"],"related_works":["https://openalex.org/W87513465","https://openalex.org/W8539471","https://openalex.org/W4312372616","https://openalex.org/W3203256658","https://openalex.org/W2786230833","https://openalex.org/W2391666574","https://openalex.org/W2371091044","https://openalex.org/W2352650970","https://openalex.org/W2171010636","https://openalex.org/W1493952344"],"abstract_inverted_index":{"This":[0],"paper":[1],"focuses":[2],"on":[3],"a":[4,32],"multi-agent":[5,33],"cooperation":[6,34],"which":[7],"is":[8],"generally":[9],"difficult":[10],"to":[11,40],"be":[12],"achieved":[13],"without":[14,35],"sufficient":[15,36],"information":[16],"of":[17],"other":[18],"agents,":[19],"and":[20],"proposes":[21],"the":[22],"reinforcement":[23],"learning":[24],"method":[25],"that":[26],"introduces":[27],"an":[28],"internal":[29],"reward":[30],"for":[31],"information.":[37],"To":[38],"guarantee":[39]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2408295367","counts_by_year":[],"updated_date":"2024-12-07T07:51:50.260813","created_date":"2016-06-24"}