{"id":"https://openalex.org/W3028389536","doi":"https://doi.org/10.1007/s42979-020-00191-2","title":"Reward Value-Based Goal Selection for Agents\u2019 Cooperative Route Learning Without Communication in Reward and Goal Dynamism","display_name":"Reward Value-Based Goal Selection for Agents\u2019 Cooperative Route Learning Without Communication in Reward and Goal Dynamism","publication_year":2020,"publication_date":"2020-05-01","ids":{"openalex":"https://openalex.org/W3028389536","doi":"https://doi.org/10.1007/s42979-020-00191-2","mag":"3028389536"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1007/s42979-020-00191-2","pdf_url":"https://link.springer.com/content/pdf/10.1007/s42979-020-00191-2.pdf","source":{"id":"https://openalex.org/S4210174798","display_name":"SN Computer Science","issn_l":"2661-8907","issn":["2661-8907","2662-995X"],"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":true,"is_core":true,"host_organization":"https://openalex.org/P4310319965","host_organization_name":"Springer Nature","host_organization_lineage":["https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s42979-020-00191-2.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5037814625","display_name":"Fumito Uwano","orcid":"https://orcid.org/0000-0003-4139-2605"},"institutions":[{"id":"https://openalex.org/I163770644","display_name":"Okayama University","ror":"https://ror.org/02pc6pc55","country_code":"JP","type":"education","lineage":["https://openalex.org/I163770644"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Fumito Uwano","raw_affiliation_strings":["Okayama University, 3-1-1, Tsushima-naka, Kita-ku, Okayama, Japan"],"affiliations":[{"raw_affiliation_string":"Okayama University, 3-1-1, Tsushima-naka, Kita-ku, Okayama, Japan","institution_ids":["https://openalex.org/I163770644"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5084452161","display_name":"Keiki Takadama","orcid":"https://orcid.org/0009-0007-0916-5505"},"institutions":[{"id":"https://openalex.org/I20529979","display_name":"University of Electro-Communications","ror":"https://ror.org/02x73b849","country_code":"JP","type":"education","lineage":["https://openalex.org/I20529979"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Keiki Takadama","raw_affiliation_strings":["The University of Electro-Communications, 1-5-1, Chofugaoka, Chofu-shi, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"The University of Electro-Communications, 1-5-1, Chofugaoka, Chofu-shi, Tokyo, Japan","institution_ids":["https://openalex.org/I20529979"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5037814625"],"corresponding_institution_ids":["https://openalex.org/I163770644"],"apc_list":{"value":2290,"currency":"EUR","value_usd":2890,"provenance":"doaj"},"apc_paid":{"value":2290,"currency":"EUR","value_usd":2890,"provenance":"doaj"},"fwci":0.0,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":60},"biblio":{"volume":"1","issue":"3","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.994,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.994,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10524","display_name":"Traffic control and management","score":0.9894,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11942","display_name":"Transportation and Mobility Innovations","score":0.9838,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/dynamism","display_name":"Dynamism","score":0.89208364},{"id":"https://openalex.org/keywords/value","display_name":"Value (mathematics)","score":0.5182414}],"concepts":[{"id":"https://openalex.org/C2775836275","wikidata":"https://www.wikidata.org/wiki/Q3502310","display_name":"Dynamism","level":2,"score":0.89208364},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7151342},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.65485096},{"id":"https://openalex.org/C181622380","wikidata":"https://www.wikidata.org/wiki/Q26911","display_name":"Profit (economics)","level":2,"score":0.5253094},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.5182414},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.48819286},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.47424263},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.439289},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.41231686},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40211993},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2646374},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.22754946},{"id":"https://openalex.org/C175444787","wikidata":"https://www.wikidata.org/wiki/Q39072","display_name":"Microeconomics","level":1,"score":0.16785762},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.14875492},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.09036103},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.074989736},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1007/s42979-020-00191-2","pdf_url":"https://link.springer.com/content/pdf/10.1007/s42979-020-00191-2.pdf","source":{"id":"https://openalex.org/S4210174798","display_name":"SN Computer Science","issn_l":"2661-8907","issn":["2661-8907","2662-995X"],"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":true,"is_core":true,"host_organization":"https://openalex.org/P4310319965","host_organization_name":"Springer Nature","host_organization_lineage":["https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1007/s42979-020-00191-2","pdf_url":"https://link.springer.com/content/pdf/10.1007/s42979-020-00191-2.pdf","source":{"id":"https://openalex.org/S4210174798","display_name":"SN Computer Science","issn_l":"2661-8907","issn":["2661-8907","2662-995X"],"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":true,"is_core":true,"host_organization":"https://openalex.org/P4310319965","host_organization_name":"Springer Nature","host_organization_lineage":["https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"display_name":"No poverty","id":"https://metadata.un.org/sdg/1","score":0.42}],"grants":[{"funder":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science","award_id":"JP17J08724"}],"datasets":[],"versions":[],"referenced_works_count":12,"referenced_works":["https://openalex.org/W1515851193","https://openalex.org/W2293337064","https://openalex.org/W2563868960","https://openalex.org/W2758442112","https://openalex.org/W2788212683","https://openalex.org/W2808475207","https://openalex.org/W2885132842","https://openalex.org/W2964247745","https://openalex.org/W2978405293","https://openalex.org/W2978656800","https://openalex.org/W3011120880","https://openalex.org/W4253683673"],"related_works":["https://openalex.org/W4297802007","https://openalex.org/W4297288183","https://openalex.org/W3126342311","https://openalex.org/W3122659832","https://openalex.org/W2775384662","https://openalex.org/W2586399084","https://openalex.org/W2385556862","https://openalex.org/W2367908682","https://openalex.org/W2117883229","https://openalex.org/W2000438032"],"abstract_inverted_index":{"Abstract":[0],"This":[1,186,242],"paper":[2,73,187,243],"proposes":[3],"a":[4,69],"goal":[5,37,267],"selection":[6],"method":[7,23,81,150],"to":[8,25,28,31,41,47,52,155,225],"operate":[9],"agents":[10,27,46,153,224],"get":[11],"maximum":[12],"reward":[13,34,123,146,270],"values":[14,35,124,271],"per":[15],"time":[16,102,118,141,200,306],"by":[17,144,180,254],"noncommunicative":[18,77],"learning.":[19],"In":[20,295],"particular,":[21,296],"that":[22,194,217,280,300],"aims":[24],"enable":[26,45],"cooperate":[29],"along":[30,51],"dynamism":[32],"of":[33,60,88,98,101,108,117,140,169,238,248,262,277],"and":[36,56,65,91,104,133,192,269],"locations.":[38],"Adaptation":[39],"against":[40],"these":[42],"dynamisms":[43],"can":[44,202,221,308],"learn":[48,154],"cooperative":[49,78,178],"actions":[50],"changing":[53,57],"transportation":[54],"tasks":[55,62],"incomes/rewards":[58],"because":[59],"transporting":[61],"for":[63,130,313],"heavy/valuable":[64],"light/valueless":[66],"items":[67],"in":[68,159,162,236,259,291],"storehouse.":[70],"Concretely,":[71],"this":[72,278],"extends":[74],"the":[75,93,99,105,109,114,121,126,134,137,145,157,160,166,170,183,189,195,198,212,214,219,223,227,233,239,246,251,257,281,301,304,310,314],"previous":[76,184],"action":[79],"learning":[80,85],"(Profit":[82],"minimizing":[83],"reinforcement":[84],"with":[86,207,232,250,256],"oblivion":[87],"memory:":[89],"PMRL-OM)":[90],"sets":[92],"two":[94,240,252],"unified":[95,110,190,282],"conditions":[96,111,191,253,283,290],"combined":[97,204],"number":[100,116,139,168],"steps":[103,119,142,201],"rewards.":[106],"One":[107],"is":[112,136,298],"calculated":[113],"approximated":[115,199,305],"if":[120],"expected":[122],"are":[125,272],"same":[127,209],"each":[128,175],"other":[129,135,215],"all":[131,152],"purposes,":[132],"minimum":[138,167],"divided":[143],"value.":[147],"The":[148,275],"proposed":[149],"makes":[151],"achieve":[156],"purposes":[158,229],"order":[161],"which":[163],"they":[164],"have":[165],"condition":[171,196,220,302],"values.":[172],"After":[173],"that,":[174],"agent":[176],"learns":[177],"policy":[179],"PMRL-OM":[181,249,258,287],"as":[182],"method.":[185],"analyzes":[188],"derives":[193],"calculating":[197,303],"be":[203],"both":[205],"evaluations":[206],"almost":[208],"weight":[210],"unlike":[211],"value":[213],"condition,":[216],"is,":[218],"help":[222],"select":[226],"appropriate":[228,311],"among":[230],"them":[231],"small":[234],"difference":[235],"terms":[237],"evaluations.":[241],"tests":[244],"empirically":[245],"performances":[247],"comparing":[255],"three":[260],"cases":[261],"grid":[263,292],"world":[264,293],"problems":[265],"whose":[266],"locations":[268],"changed":[273],"dynamically.":[274],"results":[276],"derive":[279],"perform":[284],"better":[285],"than":[286],"without":[288],"some":[289],"problems.":[294],"it":[297],"clear":[299],"step":[307],"direct":[309],"goals":[312],"agents.":[315]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W3028389536","counts_by_year":[],"updated_date":"2025-01-21T08:34:20.191191","created_date":"2020-05-29"}