{"id":"https://openalex.org/W4391307202","doi":"https://doi.org/10.1109/smc53992.2023.10394201","title":"Overcoming Delayed Feedback via Overlook Decision Making","display_name":"Overcoming Delayed Feedback via Overlook Decision Making","publication_year":2023,"publication_date":"2023-10-01","ids":{"openalex":"https://openalex.org/W4391307202","doi":"https://doi.org/10.1109/smc53992.2023.10394201"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/smc53992.2023.10394201","pdf_url":null,"source":{"id":"https://openalex.org/S4363607746","display_name":"2022 IEEE International Conference on Systems, Man, and Cybernetics (SMC)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100583624","display_name":"YaLou Yu","orcid":null},"institutions":[],"countries":["CN"],"is_corresponding":false,"raw_author_name":"YaLou Yu","raw_affiliation_strings":["Tsinghua Shenzhen International Graduate University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua Shenzhen International Graduate University, Shenzhen, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044226655","display_name":"Bo Xia","orcid":"https://orcid.org/0000-0001-7694-4743"},"institutions":[],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bo Xia","raw_affiliation_strings":["Tsinghua Shenzhen International Graduate University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua Shenzhen International Graduate University, Shenzhen, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104170669","display_name":"Minzhi Xie","orcid":null},"institutions":[],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Minzhi Xie","raw_affiliation_strings":["Tsinghua Shenzhen International Graduate University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua Shenzhen International Graduate University, Shenzhen, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100737125","display_name":"Xueqian Wang","orcid":"https://orcid.org/0000-0003-3542-0593"},"institutions":[],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xueqian Wang","raw_affiliation_strings":["Tsinghua Shenzhen International Graduate University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua Shenzhen International Graduate University, Shenzhen, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100765837","display_name":"Zhiheng Li","orcid":"https://orcid.org/0000-0002-1523-1114"},"institutions":[],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiheng Li","raw_affiliation_strings":["Tsinghua Shenzhen International Graduate University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua Shenzhen International Graduate University, Shenzhen, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5026500154","display_name":"Yongzhe Chang","orcid":null},"institutions":[],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yongzhe Chang","raw_affiliation_strings":["Tsinghua Shenzhen International Graduate University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua Shenzhen International Graduate University, Shenzhen, China","institution_ids":[]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":67},"biblio":{"volume":null,"issue":null,"first_page":"31","last_page":"37"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11902","display_name":"Intelligent Tutoring Systems and Adaptive Learning","score":0.6798,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11902","display_name":"Intelligent Tutoring Systems and Adaptive Learning","score":0.6798,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5743162},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.32381845},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.18113023}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/smc53992.2023.10394201","pdf_url":null,"source":{"id":"https://openalex.org/S4363607746","display_name":"2022 IEEE International Conference on Systems, Man, and Cybernetics (SMC)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.56,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, justice, and strong institutions"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":22,"referenced_works":["https://openalex.org/W1589577058","https://openalex.org/W1964827441","https://openalex.org/W1986278286","https://openalex.org/W1988098342","https://openalex.org/W2096750723","https://openalex.org/W2102847492","https://openalex.org/W2117864026","https://openalex.org/W2157331557","https://openalex.org/W2158782408","https://openalex.org/W2159566498","https://openalex.org/W2268617045","https://openalex.org/W2736601468","https://openalex.org/W2877093712","https://openalex.org/W2897445793","https://openalex.org/W2953708620","https://openalex.org/W3082694719","https://openalex.org/W3154264273","https://openalex.org/W3195689706","https://openalex.org/W3203259721","https://openalex.org/W4240729697","https://openalex.org/W4298206671","https://openalex.org/W4385245566"],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2530322880","https://openalex.org/W2390279801","https://openalex.org/W2382290278","https://openalex.org/W2376932109","https://openalex.org/W2359140296","https://openalex.org/W2358668433","https://openalex.org/W2350741829","https://openalex.org/W2001405890","https://openalex.org/W1596801655"],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1,50],"is":[2,31,62,70],"one":[3],"of":[4,48,183,193],"the":[5,16,19,46,74,97,102,111,126,191],"most":[6,53],"general":[7],"paradigms":[8],"to":[9,36,56,63,72,100,128,185],"solve":[10,57],"sequential":[11],"decision":[12,155],"making":[13],"issues":[14],"on":[15,171,204],"assumption":[17,30],"that":[18,84,209],"action":[20],"selection":[21],"and":[22,91,119,166,190,195,206],"environmental":[23],"feedback":[24],"are":[25,198],"instantaneous,":[26],"however,":[27],"unfortunately":[28],"this":[29],"rarely":[32],"true":[33],"with":[34,141,218,222],"regard":[35],"such":[37,215],"ubiquitous":[38],"delays":[39,220],"in":[40,214],"real-world":[41],"system":[42],"which":[43,69,114,226],"could":[44],"degrade":[45],"performance":[47,213],"reinforcement":[49],"algorithms.":[51],"The":[52],"common":[54],"solution":[55],"a":[58,65,85,134,162,167,180],"fixed":[59],"delay":[60],"problem":[61],"design":[64],"forward":[66],"dynamic":[67,187],"model":[68,140,165,177],"used":[71],"predict":[73],"newest":[75,103],"state":[76,87,149],"by":[77,125],"recursively":[78],"iterating":[79],"over":[80],"long":[81],"steps":[82],"so":[83],"predicted":[86],"can":[88],"be":[89,94],"got":[90],"it":[92],"would":[93],"taken":[95],"as":[96],"agent's":[98,122],"observation":[99],"make":[101,115],"decision.":[104,123],"However,":[105],"there":[106],"exists":[107],"cumulative":[108,130],"errors":[109],"during":[110],"iterative":[112],"process":[113],"long-term":[116],"prediction":[117,164,194],"inaccurate":[118],"further":[120],"affect":[121],"Motivated":[124],"goal":[127],"reduce":[129],"errors,":[131],"we":[132],"propose":[133],"new":[135],"algorithm":[136],"named":[137],"Multi-step":[138],"Prediction":[139],"Delayed":[142],"Observation(MPDO),":[143],"aiming":[144],"at":[145,150],"accurately":[146],"predicting":[147],"future":[148],"longer":[151],"horizons":[152],"for":[153],"better":[154],"making.":[156],"Our":[157,176],"approach":[158],"includes":[159],"two":[160],"parts:":[161],"multi-step":[163],"strategy":[168],"training":[169],"based":[170],"proximal":[172],"policy":[173],"optimization":[174],"algorithms(PPO).":[175],"only":[178],"needs":[179],"small":[181],"amount":[182],"data":[184],"conduct":[186],"modeling":[188],"quickly,":[189],"accuracy":[192],"iteration":[196],"speed":[197],"higher":[199,212],"than":[200],"traditional":[201],"methods.":[202],"Experiments":[203],"Gym":[205],"MuJoCo":[207],"show":[208],"MPDO":[210],"achieves":[211],"different":[216,219],"tasks":[217],"compared":[221],"other":[223],"state-of-the-art":[224],"methods,":[225],"verify":[227],"our":[228],"method's":[229],"effectiveness.":[230]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4391307202","counts_by_year":[],"updated_date":"2025-01-19T16:46:34.819531","created_date":"2024-01-30"}