{"id":"https://openalex.org/W4383173701","doi":"https://doi.org/10.48550/arxiv.2307.00405","title":"Provably Efficient UCB-type Algorithms For Learning Predictive State Representations","display_name":"Provably Efficient UCB-type Algorithms For Learning Predictive State Representations","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4383173701","doi":"https://doi.org/10.48550/arxiv.2307.00405"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2307.00405","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2307.00405","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5025491239","display_name":"Ruiquan Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Ruiquan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100384384","display_name":"Yingbin Liang","orcid":"https://orcid.org/0000-0003-2631-4262"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liang, Yingbin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100459997","display_name":"Jing Yang","orcid":"https://orcid.org/0000-0003-2799-0000"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Jing","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":65},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10876","display_name":"Fault Detection and Control Systems","score":0.9951,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10876","display_name":"Fault Detection and Control Systems","score":0.9951,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9921,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10820","display_name":"Fuzzy Logic and Control Systems","score":0.9748,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6279149},{"id":"https://openalex.org/C2777299769","wikidata":"https://www.wikidata.org/wiki/Q3707858","display_name":"Type (biology)","level":2,"score":0.5259314},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.51645756},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.49522272},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40193585},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.36409104},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2307.00405","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2307.00405","pdf_url":"http://arxiv.org/pdf/2307.00405","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2307.00405","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2307.00405","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.82,"display_name":"Peace, justice, and strong institutions"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4386462264","https://openalex.org/W4364306694","https://openalex.org/W4312192474","https://openalex.org/W4306674287","https://openalex.org/W4283697347","https://openalex.org/W4210805261","https://openalex.org/W3170094116","https://openalex.org/W3107602296","https://openalex.org/W3046775127","https://openalex.org/W2961085424"],"abstract_inverted_index":{"The":[0],"general":[1,108],"sequential":[2,47],"decision-making":[3,48],"problem,":[4],"which":[5,90],"includes":[6],"Markov":[7],"decision":[8],"processes":[9],"(MDPs)":[10],"and":[11,37,100,148,166,186],"partially":[12],"observable":[13],"MDPs":[14],"(POMDPs)":[15],"as":[16,94],"special":[17],"cases,":[18],"aims":[19],"at":[20],"maximizing":[21],"a":[22,27,33,56,134],"cumulative":[23],"reward":[24],"by":[25,60],"making":[26],"sequence":[28],"of":[29,35,114],"decisions":[30],"based":[31,88],"on":[32],"history":[34],"observations":[36],"actions":[38],"over":[39],"time.":[40],"Recent":[41],"studies":[42],"have":[43,91,102],"shown":[44],"that":[45,75,138],"the":[46,80,83,112,126,141,146,154],"problem":[49],"is":[50],"statistically":[51],"learnable":[52],"if":[53],"it":[54],"admits":[55],"low-rank":[57],"structure":[58],"modeled":[59],"predictive":[61],"state":[62],"representations":[63],"(PSRs).":[64],"Despite":[65],"these":[66,119],"advancements,":[67],"existing":[68,172],"approaches":[69,173],"typically":[70],"involve":[71],"oracles":[72],"or":[73],"steps":[74],"are":[76],"computationally":[77,95],"intractable.":[78],"On":[79],"other":[81],"hand,":[82],"upper":[84,139],"confidence":[85],"bound":[86],"(UCB)":[87],"approaches,":[89],"served":[92],"successfully":[93],"efficient":[96],"methods":[97],"in":[98,118],"bandits":[99],"MDPs,":[101],"not":[103],"been":[104],"investigated":[105],"for":[106,131,158,163,174],"more":[107,120],"PSRs,":[109,132,175],"due":[110],"to":[111,171],"difficulty":[113],"optimistic":[115],"bonus":[116,136],"design":[117],"challenging":[121],"settings.":[122],"This":[123],"paper":[124],"proposes":[125],"first":[127],"known":[128],"UCB-type":[129,161,177],"approach":[130],"featuring":[133],"novel":[135],"term":[137],"bounds":[140,157],"total":[142],"variation":[143],"distance":[144],"between":[145],"estimated":[147],"true":[149],"models.":[150],"We":[151],"further":[152],"characterize":[153],"sample":[155],"complexity":[156],"our":[159,176],"designed":[160],"algorithms":[162,178],"both":[164],"online":[165],"offline":[167],"PSRs.":[168],"In":[169],"contrast":[170],"enjoy":[179],"computational":[180],"tractability,":[181],"last-iterate":[182],"guaranteed":[183,187],"near-optimal":[184],"policy,":[185],"model":[188],"accuracy.":[189]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4383173701","counts_by_year":[],"updated_date":"2025-04-15T17:16:25.892048","created_date":"2023-07-05"}