{"id":"https://openalex.org/W4404344712","doi":"https://doi.org/10.48550/arxiv.2411.00205","title":"Compositional Automata Embeddings for Goal-Conditioned Reinforcement\n Learning","display_name":"Compositional Automata Embeddings for Goal-Conditioned Reinforcement\n Learning","publication_year":2024,"publication_date":"2024-10-31","ids":{"openalex":"https://openalex.org/W4404344712","doi":"https://doi.org/10.48550/arxiv.2411.00205"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2411.00205","pdf_url":"http://arxiv.org/pdf/2411.00205","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2411.00205","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5071670707","display_name":"Beyazit Yalcinkaya","orcid":"https://orcid.org/0000-0001-9987-635X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yalcinkaya, Beyazit","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059784141","display_name":"Niklas Lauffer","orcid":"https://orcid.org/0000-0003-2726-5159"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lauffer, Niklas","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044414209","display_name":"Marcell Vazquez-Chanlatte","orcid":"https://orcid.org/0000-0002-1248-0000"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vazquez-Chanlatte, Marcell","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5064230639","display_name":"Sanjit A. Seshia","orcid":"https://orcid.org/0000-0001-6190-8707"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Seshia, Sanjit A.","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":77},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T12288","display_name":"Optimization and Search Problems","score":0.9758,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12288","display_name":"Optimization and Search Problems","score":0.9758,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.932,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12784","display_name":"Modular Robots and Swarm Intelligence","score":0.9265,"subfield":{"id":"https://openalex.org/subfields/2210","display_name":"Mechanical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.64586055},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6208219},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4818793},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.3797413},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.34683573},{"id":"https://openalex.org/C188147891","wikidata":"https://www.wikidata.org/wiki/Q147638","display_name":"Cognitive science","level":1,"score":0.32135683},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.16306818}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2411.00205","pdf_url":"http://arxiv.org/pdf/2411.00205","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2411.00205","pdf_url":"http://arxiv.org/pdf/2411.00205","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W4310083477","https://openalex.org/W2920061524","https://openalex.org/W2748952813","https://openalex.org/W2328553770","https://openalex.org/W2145821588","https://openalex.org/W2107890255","https://openalex.org/W2106552856","https://openalex.org/W2038908348","https://openalex.org/W1977959518"],"abstract_inverted_index":{"Goal-conditioned":[0],"reinforcement":[1],"learning":[2],"is":[3],"a":[4,73,79,87,124,128],"powerful":[5],"way":[6],"to":[7,29,53,98,110,127,157],"control":[8],"an":[9],"AI":[10],"agent's":[11],"behavior":[12,113],"at":[13],"runtime.":[14],"That":[15],"said,":[16],"popular":[17],"goal":[18],"representations,":[19],"e.g.,":[20],"target":[21],"states":[22],"or":[23,32],"natural":[24],"language,":[25],"are":[26],"either":[27],"limited":[28],"Markovian":[30],"tasks":[31,132],"rely":[33],"on":[34,140],"ambiguous":[35],"task":[36,160],"semantics.":[37],"We":[38],"propose":[39,134],"representing":[40],"temporal":[41,63],"goals":[42],"using":[43],"compositions":[44],"of":[45,67,130,170],"deterministic":[46],"finite":[47],"automata":[48],"(cDFAs)":[49],"and":[50,95,133,162],"use":[51],"cDFAs":[52,57,85],"guide":[54],"RL":[55],"agents.":[56],"balance":[58],"the":[59,82,99,150,167],"need":[60],"for":[61],"formal":[62],"semantics":[64],"with":[65,92],"ease":[66],"interpretation:":[68],"if":[69],"one":[70,76],"can":[71,77,101],"understand":[72,78],"flow":[74],"chart,":[75],"cDFA.":[80],"On":[81],"other":[83],"hand,":[84],"form":[86],"countably":[88],"infinite":[89],"concept":[90],"class":[91],"Boolean":[93],"semantics,":[94],"subtle":[96],"changes":[97],"automaton":[100],"result":[102],"in":[103],"very":[104],"different":[105],"tasks,":[106],"making":[107],"them":[108],"difficult":[109],"condition":[111],"agent":[112],"on.":[114],"To":[115],"address":[116],"this,":[117],"we":[118,147],"observe":[119],"that":[120,149],"all":[121],"paths":[122],"through":[123],"DFA":[125],"correspond":[126],"series":[129],"reach-avoid":[131],"pre-training":[135,152],"graph":[136],"neural":[137],"network":[138],"embeddings":[139],"\"reach-avoid":[141],"derived\"":[142],"DFAs.":[143],"Through":[144],"empirical":[145],"evaluation,":[146],"demonstrate":[148],"proposed":[151],"method":[153],"enables":[154],"zero-shot":[155],"generalization":[156],"various":[158],"cDFA":[159],"classes":[161],"accelerated":[163],"policy":[164],"specialization":[165],"without":[166],"myopic":[168],"suboptimality":[169],"hierarchical":[171],"methods.":[172]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4404344712","counts_by_year":[],"updated_date":"2025-04-23T19:12:00.666284","created_date":"2024-11-14"}