{"id":"https://openalex.org/W4283832579","doi":"https://doi.org/10.48550/arxiv.2207.00632","title":"Offline Policy Optimization with Eligible Actions","display_name":"Offline Policy Optimization with Eligible Actions","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4283832579","doi":"https://doi.org/10.48550/arxiv.2207.00632"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2207.00632","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2207.00632","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100463098","display_name":"Yao Liu","orcid":"https://orcid.org/0000-0003-3382-798X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Yao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045192384","display_name":"Yannis Flet-Berliac","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Flet-Berliac, Yannis","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5084989076","display_name":"Emma Brunskill","orcid":"https://orcid.org/0000-0002-3971-7127"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Brunskill, Emma","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":60},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9925,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9925,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9853,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.985,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/overfitting","display_name":"Overfitting","score":0.9152458},{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization","score":0.54391575}],"concepts":[{"id":"https://openalex.org/C22019652","wikidata":"https://www.wikidata.org/wiki/Q331309","display_name":"Overfitting","level":3,"score":0.9152458},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7338923},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.68975353},{"id":"https://openalex.org/C185429906","wikidata":"https://www.wikidata.org/wiki/Q1130160","display_name":"Estimator","level":2,"score":0.5652183},{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.54391575},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5319726},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.51473475},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.45480698},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.4280674},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.13277623},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.11875442},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2207.00632","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2207.00632","pdf_url":"http://arxiv.org/pdf/2207.00632","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2207.00632","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2207.00632","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"display_name":"Peace, justice, and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.8}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4378510483","https://openalex.org/W4376166922","https://openalex.org/W4362597605","https://openalex.org/W4297676672","https://openalex.org/W4281702477","https://openalex.org/W4221142204","https://openalex.org/W3009056573","https://openalex.org/W2922073769","https://openalex.org/W2490526372","https://openalex.org/W1574414179"],"abstract_inverted_index":{"Offline":[0],"policy":[1,36,85],"optimization":[2],"could":[3],"have":[4],"a":[5,28,108,115,138,141],"large":[6],"impact":[7],"on":[8,46],"many":[9,20],"real-world":[10],"decision-making":[11],"problems,":[12],"as":[13],"online":[14],"learning":[15,170],"may":[16,79],"be":[17,80],"infeasible":[18],"in":[19,34,70,76,137],"applications.":[21],"Importance":[22],"sampling":[23],"and":[24,38,49,113,148,161],"its":[25],"variants":[26],"are":[27],"commonly":[29],"used":[30],"type":[31],"of":[32,52,94,118,127],"estimator":[33],"offline":[35],"evaluation,":[37],"such":[39],"estimators":[40],"typically":[41],"do":[42],"not":[43],"require":[44],"assumptions":[45],"the":[47,72,83,95,119,125,155],"properties":[48],"representational":[50],"capabilities":[51],"value":[53],"function":[54,59],"or":[55],"decision":[56],"process":[57],"model":[58],"classes.":[60],"In":[61],"this":[62,105,131],"paper,":[63],"we":[64],"identify":[65],"an":[66,101],"important":[67],"overfitting":[68,106,160],"phenomenon":[69],"optimizing":[71],"importance":[73],"weighted":[74],"return,":[75],"which":[77],"it":[78],"possible":[81],"for":[82,92],"learned":[84],"to":[86,103,130,166],"essentially":[87],"avoid":[88,104],"making":[89],"aligned":[90],"decisions":[91],"part":[93],"initial":[96],"state":[97],"space.":[98],"We":[99,122,133],"propose":[100],"algorithm":[102,136],"through":[107],"new":[109],"per-state-neighborhood":[110],"normalization":[111],"constraint,":[112],"provide":[114],"theoretical":[116],"justification":[117],"proposed":[120,156],"algorithm.":[121],"also":[123],"show":[124,154],"limitations":[126],"previous":[128],"attempts":[129],"approach.":[132],"test":[134,163],"our":[135],"healthcare-inspired":[139],"simulator,":[140],"logged":[142],"dataset":[143],"collected":[144],"from":[145],"real":[146],"hospitals":[147],"continuous":[149],"control":[150],"tasks.":[151],"These":[152],"experiments":[153],"method":[157],"yields":[158],"less":[159],"better":[162],"performance":[164],"compared":[165],"state-of-the-art":[167],"batch":[168],"reinforcement":[169],"algorithms.":[171]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4283832579","counts_by_year":[],"updated_date":"2024-12-19T21:20:07.248520","created_date":"2022-07-07"}