{"id":"https://openalex.org/W4403713458","doi":"https://doi.org/10.48550/arxiv.2409.09541","title":"Autonomous Goal Detection and Cessation in Reinforcement Learning: A\n Case Study on Source Term Estimation","display_name":"Autonomous Goal Detection and Cessation in Reinforcement Learning: A\n Case Study on Source Term Estimation","publication_year":2024,"publication_date":"2024-09-14","ids":{"openalex":"https://openalex.org/W4403713458","doi":"https://doi.org/10.48550/arxiv.2409.09541"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2409.09541","pdf_url":"http://arxiv.org/pdf/2409.09541","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2409.09541","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5010583638","display_name":"Yi-Wei Shi","orcid":"https://orcid.org/0000-0003-2169-8789"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shi, Yiwei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049802452","display_name":"Muning Wen","orcid":"https://orcid.org/0009-0000-7868-1262"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wen, Muning","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100360461","display_name":"Qi Zhang","orcid":"https://orcid.org/0009-0003-1600-5690"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Qi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090720315","display_name":"Weinan Zhang","orcid":"https://orcid.org/0000-0002-0127-2425"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Weinan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021774274","display_name":"Cunjia Liu","orcid":"https://orcid.org/0000-0003-2829-9369"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Cunjia","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5002349071","display_name":"Weiru Liu","orcid":"https://orcid.org/0000-0001-8356-1361"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Weiru","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":84},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.7611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.7611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.7241,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement Learning","score":0.67851},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine Learning","score":0.51628},{"id":"https://openalex.org/keywords/symbolic-regression","display_name":"Symbolic Regression","score":0.511033},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep Learning","score":0.507921}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.78570294},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.773122},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.58697516},{"id":"https://openalex.org/C96250715","wikidata":"https://www.wikidata.org/wiki/Q965330","display_name":"Estimation","level":2,"score":0.5481577},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5202638},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4793478},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.38839284},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.32415643},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.18253422},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.14756021},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2409.09541","pdf_url":"http://arxiv.org/pdf/2409.09541","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2409.09541","pdf_url":"http://arxiv.org/pdf/2409.09541","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4310083477","https://openalex.org/W2920061524","https://openalex.org/W2328553770","https://openalex.org/W2145821588","https://openalex.org/W2107890255","https://openalex.org/W2106552856","https://openalex.org/W2086122291","https://openalex.org/W2038908348","https://openalex.org/W1987513656","https://openalex.org/W1977959518"],"abstract_inverted_index":{"Reinforcement":[0],"Learning":[1],"has":[2],"revolutionized":[3],"decision-making":[4],"processes":[5],"in":[6,25,108,181,197],"dynamic":[7],"environments,":[8],"yet":[9],"it":[10,38],"often":[11],"struggles":[12],"with":[13,110,122],"autonomously":[14],"detecting":[15],"and":[16,45,48,64,83,92,128,134,161,166,189,195],"achieving":[17],"goals":[18,95],"without":[19],"clear":[20,42],"feedback":[21,43],"signals.":[22],"For":[23],"example,":[24],"a":[26,76,171],"Source":[27,140],"Term":[28,141],"Estimation":[29,142],"problem,":[30],"the":[31,51,60,98,103,139],"lack":[32],"of":[33,105,116,183],"precise":[34],"environmental":[35],"information":[36],"makes":[37],"challenging":[39],"to":[40,46],"provide":[41],"signals":[44],"define":[47],"evaluate":[49],"how":[50],"source's":[52],"location":[53],"is":[54],"determined.":[55],"To":[56,113],"address":[57],"this":[58],"challenge,":[59],"Autonomous":[61],"Goal":[62],"Detection":[63],"Cessation":[65],"(AGDC)":[66],"module":[67],"was":[68],"developed,":[69],"enhancing":[70,102],"various":[71],"RL":[72,106,150],"algorithms":[73,107,151],"by":[74,96],"incorporating":[75],"self-feedback":[77],"mechanism":[78],"for":[79,164],"autonomous":[80],"goal":[81],"detection":[82],"cessation":[84],"upon":[85],"task":[86],"completion.":[87],"Our":[88],"method":[89],"effectively":[90],"identifies":[91],"ceases":[93],"undefined":[94],"approximating":[97],"agent's":[99],"belief,":[100],"significantly":[101,152],"capabilities":[104],"environments":[109],"limited":[111],"feedback.":[112],"validate":[114],"effectiveness":[115,194],"our":[117],"approach,":[118],"we":[119],"integrated":[120],"AGDC":[121],"deep":[123,129],"Q-Network,":[124],"proximal":[125],"policy":[126,131],"optimization,":[127],"deterministic":[130],"gradient":[132],"algorithms,":[133],"evaluated":[135],"its":[136],"performance":[137],"on":[138],"problem.":[143],"The":[144],"experimental":[145],"results":[146],"showed":[147],"that":[148],"AGDC-enhanced":[149],"outperformed":[153],"traditional":[154],"statistical":[155],"methods":[156],"such":[157],"as":[158,168,170],"infotaxis,":[159],"entrotaxis,":[160],"dual":[162],"control":[163],"exploitation":[165],"exploration,":[167],"well":[169],"non-statistical":[172],"random":[173],"action":[174],"selection":[175],"method.":[176],"These":[177],"improvements":[178],"were":[179],"evident":[180],"terms":[182],"success":[184],"rate,":[185],"mean":[186],"traveled":[187],"distance,":[188],"search":[190],"time,":[191],"highlighting":[192],"AGDC's":[193],"efficiency":[196],"complex,":[198],"real-world":[199],"scenarios.":[200]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4403713458","counts_by_year":[],"updated_date":"2024-12-04T20:39:51.320559","created_date":"2024-10-25"}