{"id":"https://openalex.org/W4378718571","doi":"https://doi.org/10.48550/arxiv.2305.17115","title":"Policy Synthesis and Reinforcement Learning for Discounted LTL","display_name":"Policy Synthesis and Reinforcement Learning for Discounted LTL","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4378718571","doi":"https://doi.org/10.48550/arxiv.2305.17115"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2305.17115","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2305.17115","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5065059795","display_name":"Rajeev Alur","orcid":"https://orcid.org/0000-0003-1733-7083"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alur, Rajeev","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029243071","display_name":"Osbert Bastani","orcid":"https://orcid.org/0000-0001-9990-7566"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bastani, Osbert","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055988569","display_name":"Kishor Jothimurugan","orcid":"https://orcid.org/0000-0003-1448-2947"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jothimurugan, Kishor","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084221298","display_name":"Mateo Perez","orcid":"https://orcid.org/0000-0003-4220-3212"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Perez, Mateo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077549627","display_name":"Fabio Somenzi","orcid":"https://orcid.org/0000-0002-2085-2003"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Somenzi, Fabio","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5020302140","display_name":"Ashutosh Trivedi","orcid":"https://orcid.org/0000-0001-9346-0126"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Trivedi, Ashutosh","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":65},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11178","display_name":"Receptor Mechanisms and Signaling","score":0.8777,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11178","display_name":"Receptor Mechanisms and Signaling","score":0.8777,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.7633,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10142","display_name":"Formal Methods in Verification","score":0.7486,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C6177178","wikidata":"https://www.wikidata.org/wiki/Q10998070","display_name":"Discounting","level":2,"score":0.78340137},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.72087646},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.6385408},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.47914785},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.47282752},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.43429402},{"id":"https://openalex.org/C4777664","wikidata":"https://www.wikidata.org/wiki/Q1536492","display_name":"Linear temporal logic","level":2,"score":0.41894925},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.34528702},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.29178017},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2893613},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.25956607},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.14533895},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.11520323},{"id":"https://openalex.org/C10138342","wikidata":"https://www.wikidata.org/wiki/Q43015","display_name":"Finance","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2305.17115","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2305.17115","pdf_url":"http://arxiv.org/pdf/2305.17115","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2305.17115","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2305.17115","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"score":0.78,"display_name":"Peace, justice, and strong institutions","id":"https://metadata.un.org/sdg/16"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W3168977894","https://openalex.org/W3096874164","https://openalex.org/W2937181779","https://openalex.org/W2386410636","https://openalex.org/W2357975469","https://openalex.org/W2341346307","https://openalex.org/W2145363145","https://openalex.org/W2009064992","https://openalex.org/W1985560493","https://openalex.org/W1626977535"],"abstract_inverted_index":{"The":[0],"difficulty":[1],"of":[2,56,65,72],"manually":[3],"specifying":[4],"reward":[5,95,98],"functions":[6],"has":[7,27],"led":[8],"to":[9,18,34,89,93],"an":[10],"interest":[11],"in":[12,37,78],"using":[13],"linear":[14],"temporal":[15],"logic":[16],"(LTL)":[17],"express":[19],"objectives":[20],"for":[21,75],"reinforcement":[22],"learning":[23,47],"(RL).":[24],"However,":[25],"LTL":[26,74,92],"the":[28,38,62,66,70],"downside":[29],"that":[30],"it":[31],"is":[32],"sensitive":[33],"small":[35],"perturbations":[36],"transition":[39,84],"probabilities,":[40,85],"which":[41],"prevents":[42],"probably":[43],"approximately":[44],"correct":[45],"(PAC)":[46],"without":[48],"additional":[49],"assumptions.":[50],"Time":[51],"discounting":[52],"provides":[53],"a":[54,97],"way":[55],"removing":[57],"this":[58],"sensitivity,":[59],"while":[60],"retaining":[61],"high":[63],"expressivity":[64],"logic.":[67],"We":[68],"study":[69],"use":[71],"discounted":[73,91],"policy":[76],"synthesis":[77],"Markov":[79],"decision":[80],"processes":[81],"with":[82],"unknown":[83],"and":[86],"show":[87],"how":[88],"reduce":[90],"discounted-sum":[94],"via":[96],"machine":[99],"when":[100],"all":[101],"discount":[102],"factors":[103],"are":[104],"identical.":[105]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4378718571","counts_by_year":[],"updated_date":"2025-04-08T23:13:50.330889","created_date":"2023-05-30"}