{"id":"https://openalex.org/W2995509045","doi":"https://doi.org/10.24963/ijcai.2021/461","title":"Independence-aware Advantage Estimation","display_name":"Independence-aware Advantage Estimation","publication_year":2021,"publication_date":"2021-08-01","ids":{"openalex":"https://openalex.org/W2995509045","doi":"https://doi.org/10.24963/ijcai.2021/461","mag":"2995509045"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2021/461","pdf_url":"https://www.ijcai.org/proceedings/2021/0461.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://www.ijcai.org/proceedings/2021/0461.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048462355","display_name":"Pushi Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"funder","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Pushi Zhang","raw_affiliation_strings":["Tsinghua University"],"affiliations":[{"raw_affiliation_string":"Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101858782","display_name":"Li Zhao","orcid":"https://orcid.org/0000-0001-6918-0204"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"funder","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Li Zhao","raw_affiliation_strings":["Microsoft Research Asia"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100731912","display_name":"Guoqing Liu","orcid":"https://orcid.org/0000-0003-4110-7616"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"funder","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guoqing Liu","raw_affiliation_strings":["University of Science and Technology of China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101544241","display_name":"Jiang Bian","orcid":"https://orcid.org/0000-0002-9472-600X"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"funder","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiang Bian","raw_affiliation_strings":["Microsoft Research Asia"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044042138","display_name":"Minlie Huang","orcid":"https://orcid.org/0000-0001-7111-1849"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"funder","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Minlie Huang","raw_affiliation_strings":["Tsinghua University"],"affiliations":[{"raw_affiliation_string":"Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020025718","display_name":"Tao Qin","orcid":"https://orcid.org/0000-0002-9095-0776"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"funder","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tao Qin","raw_affiliation_strings":["Microsoft Research Asia"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101884287","display_name":"Tie\u2010Yan Liu","orcid":"https://orcid.org/0000-0002-0476-8020"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"funder","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tie-Yan Liu","raw_affiliation_strings":["Microsoft Research Asia"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia","institution_ids":["https://openalex.org/I4210113369"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":1,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":56,"max":66},"biblio":{"volume":null,"issue":null,"first_page":"3349","last_page":"3355"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9927,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11663","display_name":"Viral Infectious Diseases and Gene Expression in Insects","score":0.9742,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/independence","display_name":"Independence","score":0.6468302},{"id":"https://openalex.org/keywords/control-variates","display_name":"Control variates","score":0.60155356},{"id":"https://openalex.org/keywords/monte-carlo-integration","display_name":"Monte Carlo integration","score":0.44532222}],"concepts":[{"id":"https://openalex.org/C196083921","wikidata":"https://www.wikidata.org/wiki/Q7915758","display_name":"Variance (accounting)","level":2,"score":0.7684002},{"id":"https://openalex.org/C185429906","wikidata":"https://www.wikidata.org/wiki/Q1130160","display_name":"Estimator","level":2,"score":0.7323302},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.68638647},{"id":"https://openalex.org/C19499675","wikidata":"https://www.wikidata.org/wiki/Q232207","display_name":"Monte Carlo method","level":2,"score":0.6540966},{"id":"https://openalex.org/C35651441","wikidata":"https://www.wikidata.org/wiki/Q625303","display_name":"Independence (probability theory)","level":2,"score":0.6468302},{"id":"https://openalex.org/C121683094","wikidata":"https://www.wikidata.org/wiki/Q3554721","display_name":"Control variates","level":5,"score":0.60155356},{"id":"https://openalex.org/C52740198","wikidata":"https://www.wikidata.org/wiki/Q1539564","display_name":"Importance sampling","level":3,"score":0.5532361},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.49268967},{"id":"https://openalex.org/C132725507","wikidata":"https://www.wikidata.org/wiki/Q39879","display_name":"Monte Carlo integration","level":5,"score":0.44532222},{"id":"https://openalex.org/C191393472","wikidata":"https://www.wikidata.org/wiki/Q15222032","display_name":"Bias of an estimator","level":4,"score":0.42463636},{"id":"https://openalex.org/C165646398","wikidata":"https://www.wikidata.org/wiki/Q3755281","display_name":"Minimum-variance unbiased estimator","level":3,"score":0.3671254},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3570426},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.34095734},{"id":"https://openalex.org/C111350023","wikidata":"https://www.wikidata.org/wiki/Q1191869","display_name":"Markov chain Monte Carlo","level":3,"score":0.31302834},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.26779544},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.24623817},{"id":"https://openalex.org/C13153151","wikidata":"https://www.wikidata.org/wiki/Q1639846","display_name":"Hybrid Monte Carlo","level":4,"score":0.13677067},{"id":"https://openalex.org/C121955636","wikidata":"https://www.wikidata.org/wiki/Q4116214","display_name":"Accounting","level":1,"score":0.0},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2021/461","pdf_url":"https://www.ijcai.org/proceedings/2021/0461.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2021/461","pdf_url":"https://www.ijcai.org/proceedings/2021/0461.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":20,"referenced_works":["https://openalex.org/W1191599655","https://openalex.org/W1771410628","https://openalex.org/W2136602922","https://openalex.org/W2155027007","https://openalex.org/W2569805627","https://openalex.org/W2625967765","https://openalex.org/W2736601468","https://openalex.org/W2786303200","https://openalex.org/W2787938642","https://openalex.org/W2951470703","https://openalex.org/W2952191563","https://openalex.org/W2963457007","https://openalex.org/W2963864421","https://openalex.org/W2964043796","https://openalex.org/W2970811133","https://openalex.org/W3168837055","https://openalex.org/W4287998216","https://openalex.org/W4289760659","https://openalex.org/W4293415974","https://openalex.org/W4298857966"],"related_works":["https://openalex.org/W4237435333","https://openalex.org/W4234882310","https://openalex.org/W4210503132","https://openalex.org/W3020567546","https://openalex.org/W2999390738","https://openalex.org/W2352602506","https://openalex.org/W2349547417","https://openalex.org/W2065756054","https://openalex.org/W1979154598","https://openalex.org/W1966798441"],"abstract_inverted_index":{"Most":[0],"of":[1,14,53,93],"existing":[2,106,131],"advantage":[3,55,73,132],"function":[4],"estimation":[5,118,133],"methods":[6,134],"in":[7,41,135],"reinforcement":[8],"learning":[9],"suffer":[10],"from":[11],"the":[12,21,32,51,54,59,80,91,94,99,117],"problem":[13],"high":[15,95],"variance,":[16],"which":[17,43],"scales":[18],"unfavorably":[19],"with":[20,75,105,130],"time":[22],"horizon.":[23],"To":[24,88],"address":[25],"this":[26],"challenge,":[27],"we":[28,102],"propose":[29],"to":[30,48,67],"identify":[31],"independence":[33,61],"property":[34,62],"between":[35],"current":[36],"action":[37],"and":[38],"future":[39],"states":[40],"environments,":[42],"can":[44,63],"be":[45,64],"further":[46,89],"leveraged":[47],"effectively":[49],"reduce":[50],"variance":[52,77,96],"estimation.":[56],"In":[57],"particular,":[58],"recognized":[60],"naturally":[65],"utilized":[66],"construct":[68],"a":[69,85,110],"novel":[70],"importance":[71],"sampling":[72],"estimator":[74,108],"close-to-zero":[76],"even":[78],"when":[79],"Monte-Carlo":[81,107],"return":[82],"signal":[83],"yields":[84],"large":[86],"variance.":[87,119],"remove":[90],"risk":[92],"introduced":[97],"by":[98,115],"new":[100],"estimator,":[101],"combine":[103],"it":[104],"via":[109],"reward":[111],"decomposition":[112],"model":[113],"learned":[114],"minimizing":[116],"Experiments":[120],"demonstrate":[121],"that":[122],"our":[123],"method":[124],"achieves":[125],"higher":[126],"sample":[127],"efficiency":[128],"compared":[129],"complex":[136],"environments.":[137]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2995509045","counts_by_year":[{"year":2019,"cited_by_count":1}],"updated_date":"2025-04-04T07:08:47.388208","created_date":"2019-12-26"}