{"id":"https://openalex.org/W4213122246","doi":"https://doi.org/10.3389/frobt.2022.762051","title":"Exploratory State Representation Learning","display_name":"Exploratory State Representation Learning","publication_year":2022,"publication_date":"2022-02-14","ids":{"openalex":"https://openalex.org/W4213122246","doi":"https://doi.org/10.3389/frobt.2022.762051","pmid":"https://pubmed.ncbi.nlm.nih.gov/35237669"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.3389/frobt.2022.762051","pdf_url":"https://www.frontiersin.org/articles/10.3389/frobt.2022.762051/pdf","source":{"id":"https://openalex.org/S2595095599","display_name":"Frontiers in Robotics and AI","issn_l":"2296-9144","issn":["2296-9144"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320527","host_organization_name":"Frontiers Media","host_organization_lineage":["https://openalex.org/P4310320527"],"host_organization_lineage_names":["Frontiers Media"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"journal-article","indexed_in":["arxiv","crossref","datacite","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.frontiersin.org/articles/10.3389/frobt.2022.762051/pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5004706201","display_name":"Astrid Merckling","orcid":null},"institutions":[{"id":"https://openalex.org/I4210150358","display_name":"Institut Syst\u00e8mes Intelligents et de Robotique","ror":"https://ror.org/05neq8668","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I154526488","https://openalex.org/I39804081","https://openalex.org/I4210150358","https://openalex.org/I4210159245"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Astrid Merckling","raw_affiliation_strings":["Institut des Syst\u00e8mes Intelligents et de Robotique"],"affiliations":[{"raw_affiliation_string":"Institut des Syst\u00e8mes Intelligents et de Robotique","institution_ids":["https://openalex.org/I4210150358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071054174","display_name":"Nicolas Perrin-Gilbert","orcid":"https://orcid.org/0000-0001-8626-1938"},"institutions":[{"id":"https://openalex.org/I4210150358","display_name":"Institut Syst\u00e8mes Intelligents et de Robotique","ror":"https://ror.org/05neq8668","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I154526488","https://openalex.org/I39804081","https://openalex.org/I4210150358","https://openalex.org/I4210159245"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Nicolas Perrin-Gilbert","raw_affiliation_strings":["Institut des Syst\u00e8mes Intelligents et de Robotique"],"affiliations":[{"raw_affiliation_string":"Institut des Syst\u00e8mes Intelligents et de Robotique","institution_ids":["https://openalex.org/I4210150358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091258715","display_name":"Alex Coninx","orcid":null},"institutions":[{"id":"https://openalex.org/I4210150358","display_name":"Institut Syst\u00e8mes Intelligents et de Robotique","ror":"https://ror.org/05neq8668","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I154526488","https://openalex.org/I39804081","https://openalex.org/I4210150358","https://openalex.org/I4210159245"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Alex Coninx","raw_affiliation_strings":["Institut des Syst\u00e8mes Intelligents et de Robotique"],"affiliations":[{"raw_affiliation_string":"Institut des Syst\u00e8mes Intelligents et de Robotique","institution_ids":["https://openalex.org/I4210150358"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5003629424","display_name":"St\u00e9phane Doncieux","orcid":"https://orcid.org/0000-0003-1541-054X"},"institutions":[{"id":"https://openalex.org/I4210150358","display_name":"Institut Syst\u00e8mes Intelligents et de Robotique","ror":"https://ror.org/05neq8668","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I154526488","https://openalex.org/I39804081","https://openalex.org/I4210150358","https://openalex.org/I4210159245"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"St\u00e9phane Doncieux","raw_affiliation_strings":["Institut des Syst\u00e8mes Intelligents et de Robotique"],"affiliations":[{"raw_affiliation_string":"Institut des Syst\u00e8mes Intelligents et de Robotique","institution_ids":["https://openalex.org/I4210150358"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":{"value":1900,"currency":"USD","value_usd":1900,"provenance":"doaj"},"apc_paid":{"value":1781,"currency":"EUR","value_usd":1920,"provenance":"openapc"},"fwci":0.324,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":2,"citation_normalized_percentile":{"value":0.685436,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":70,"max":76},"biblio":{"volume":"9","issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9953,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9947,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/representation","display_name":"Representation","score":0.7065471},{"id":"https://openalex.org/keywords/maximization","display_name":"Maximization","score":0.5207412},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature Learning","score":0.4527852}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.83319914},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.73288596},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.7065471},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.60644996},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5810013},{"id":"https://openalex.org/C2776330181","wikidata":"https://www.wikidata.org/wiki/Q18358244","display_name":"Maximization","level":2,"score":0.5207412},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4916938},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.4527852},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3307418},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.19098145},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.15975213},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.080155015},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0}],"mesh":[],"locations_count":7,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.3389/frobt.2022.762051","pdf_url":"https://www.frontiersin.org/articles/10.3389/frobt.2022.762051/pdf","source":{"id":"https://openalex.org/S2595095599","display_name":"Frontiers in Robotics and AI","issn_l":"2296-9144","issn":["2296-9144"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320527","host_organization_name":"Frontiers Media","host_organization_lineage":["https://openalex.org/P4310320527"],"host_organization_lineage_names":["Frontiers Media"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2109.13596","pdf_url":"https://arxiv.org/pdf/2109.13596","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://doaj.org/article/aacfc343d3f9470b9e5d410a29c9c31e","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://hal.science/hal-03864236","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":["Centre National de la Recherche Scientifique"],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8883277","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":["National Institutes of Health"],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/35237669","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":["National Institutes of Health"],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2109.13596","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.3389/frobt.2022.762051","pdf_url":"https://www.frontiersin.org/articles/10.3389/frobt.2022.762051/pdf","source":{"id":"https://openalex.org/S2595095599","display_name":"Frontiers in Robotics and AI","issn_l":"2296-9144","issn":["2296-9144"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320527","host_organization_name":"Frontiers Media","host_organization_lineage":["https://openalex.org/P4310320527"],"host_organization_lineage_names":["Frontiers Media"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":["https://openalex.org/W4213122246"],"referenced_works_count":63,"referenced_works":["https://openalex.org/W1164749991","https://openalex.org/W1522301498","https://openalex.org/W1555801537","https://openalex.org/W1657674574","https://openalex.org/W1881419322","https://openalex.org/W1921523184","https://openalex.org/W1957496711","https://openalex.org/W1959608418","https://openalex.org/W1994616650","https://openalex.org/W200769302","https://openalex.org/W2048984163","https://openalex.org/W2094024286","https://openalex.org/W2098774185","https://openalex.org/W2101524054","https://openalex.org/W2135362819","https://openalex.org/W2154997814","https://openalex.org/W2158782408","https://openalex.org/W2160589914","https://openalex.org/W2171438620","https://openalex.org/W2179072710","https://openalex.org/W2546302380","https://openalex.org/W2550848904","https://openalex.org/W2559655401","https://openalex.org/W2567455162","https://openalex.org/W2612690371","https://openalex.org/W2746553466","https://openalex.org/W2754517384","https://openalex.org/W2769112066","https://openalex.org/W2781585732","https://openalex.org/W2787666871","https://openalex.org/W2790924949","https://openalex.org/W2885550588","https://openalex.org/W2888561335","https://openalex.org/W2898585858","https://openalex.org/W2899771611","https://openalex.org/W2904246096","https://openalex.org/W2927928207","https://openalex.org/W2941078345","https://openalex.org/W2949475445","https://openalex.org/W2949561945","https://openalex.org/W2950872548","https://openalex.org/W2962804563","https://openalex.org/W2963009616","https://openalex.org/W2963523627","https://openalex.org/W2963864421","https://openalex.org/W2977481643","https://openalex.org/W3021708257","https://openalex.org/W3041404693","https://openalex.org/W3115293622","https://openalex.org/W3116700304","https://openalex.org/W3124420883","https://openalex.org/W4287779179","https://openalex.org/W4287811291","https://openalex.org/W4288294128","https://openalex.org/W4288333794","https://openalex.org/W4289294484","https://openalex.org/W4297791094","https://openalex.org/W4298206671","https://openalex.org/W4298857966","https://openalex.org/W4306290508","https://openalex.org/W4391602018","https://openalex.org/W4394666657","https://openalex.org/W834081922"],"related_works":["https://openalex.org/W4386136067","https://openalex.org/W4380318855","https://openalex.org/W4362501864","https://openalex.org/W4306904969","https://openalex.org/W4286858940","https://openalex.org/W3049728571","https://openalex.org/W2586732548","https://openalex.org/W2138720691","https://openalex.org/W2031695474","https://openalex.org/W2024136090"],"abstract_inverted_index":{"Not":[0],"having":[1],"access":[2],"to":[3,10,26,105,124,136,170,180],"compact":[4,94],"and":[5,73,97,122,179],"meaningful":[6],"representations":[7,96,182],"is":[8,52,64,103],"known":[9],"significantly":[11,184],"increase":[12],"the":[13,62,69,110,113,125,138,156,167],"complexity":[14],"of":[15,50,71,128,141],"reinforcement":[16],"learning":[17,30,133,186],"(RL).":[18],"For":[19],"this":[20,129],"reason,":[21],"it":[22,91,116],"can":[23,42,55,159],"be":[24,44],"useful":[25],"perform":[27],"state":[28,40,95,99,181],"representation":[29,41],"(SRL)":[31],"before":[32],"tackling":[33],"RL":[34,188],"tasks.":[35,189],"However,":[36],"obtaining":[37],"a":[38,47,57,79,98,131,142,148],"good":[39],"only":[43],"done":[45],"if":[46,61],"large":[48],"diversity":[49],"transitions":[51,153],"observed,":[53],"which":[54,102,155],"require":[56],"difficult":[58],"exploration,":[59],"especially":[60],"environment":[63],"initially":[65],"reward-free.":[66],"To":[67],"solve":[68],"problems":[70],"exploration":[72,172],"SRL":[74],"in":[75,147,173,187],"parallel,":[76],"we":[77],"propose":[78],"new":[80],"approach":[81,168],"called":[82],"XSRL":[83],"(eXploratory":[84],"State":[85],"Representation":[86],"Learning).":[87],"On":[88,112],"one":[89],"hand,":[90,115],"jointly":[92],"learns":[93],"transition":[100],"estimator":[101],"used":[104],"remove":[106],"unexploitable":[107],"information":[108],"from":[109,154],"representations.":[111],"other":[114],"continuously":[117],"trains":[118],"an":[119],"inverse":[120],"model,":[121],"adds":[123],"prediction":[126],"error":[127],"model":[130],"k-step":[132],"progress":[134],"bonus":[135],"form":[137],"maximization":[139],"objective":[140],"discovery":[143],"policy.":[144],"This":[145],"results":[146,164],"policy":[149],"that":[150,166,183],"seeks":[151],"complex":[152],"trained":[157],"models":[158],"effectively":[160],"learn.":[161],"Our":[162],"experimental":[163],"show":[165],"leads":[169],"efficient":[171],"challenging":[174],"environments":[175],"with":[176],"image":[177],"observations,":[178],"accelerate":[185]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4213122246","counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":1}],"updated_date":"2024-12-10T14:35:59.538608","created_date":"2022-02-24"}