{"id":"https://openalex.org/W4313914531","doi":"https://doi.org/10.48550/arxiv.2301.02328","title":"Extreme Q-Learning: MaxEnt RL without Entropy","display_name":"Extreme Q-Learning: MaxEnt RL without Entropy","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4313914531","doi":"https://doi.org/10.48550/arxiv.2301.02328"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2301.02328","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2301.02328","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5073107719","display_name":"Divyansh Garg","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Garg, Divyansh","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014139284","display_name":"Joey Hejna","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hejna, Joey","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110482875","display_name":"Matthieu Geist","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Geist, Matthieu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5113804858","display_name":"Stefano Ermon","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ermon, Stefano","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.778623,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":92,"max":93},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9908,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9908,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9402,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6370654},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.43660703}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.779196},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6896809},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6370654},{"id":"https://openalex.org/C147581598","wikidata":"https://www.wikidata.org/wiki/Q729429","display_name":"Extreme value theory","level":2,"score":0.59927833},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.58877254},{"id":"https://openalex.org/C9679016","wikidata":"https://www.wikidata.org/wiki/Q1417473","display_name":"Principle of maximum entropy","level":2,"score":0.57422036},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.5231056},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.43660703},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37218428},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.35707062},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.33904523},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.20267656},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.14587322},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2301.02328","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2301.02328","pdf_url":"http://arxiv.org/pdf/2301.02328","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2301.02328","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2301.02328","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"display_name":"Peace, justice, and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.41}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4380318855","https://openalex.org/W4362501864","https://openalex.org/W4312713068","https://openalex.org/W4306904969","https://openalex.org/W3041490575","https://openalex.org/W2970690932","https://openalex.org/W2138720691","https://openalex.org/W2031695474","https://openalex.org/W2024136090","https://openalex.org/W1508848516"],"abstract_inverted_index":{"Modern":[0],"Deep":[1],"Reinforcement":[2],"Learning":[3],"(RL)":[4],"algorithms":[5],"require":[6,127],"estimates":[7,84],"of":[8,24,72],"the":[9,44,85,91,116,142,152],"maximal":[10,45],"Q-value,":[11],"which":[12,41,66],"are":[13],"difficult":[14],"to":[15,78,98,129],"compute":[16],"in":[17,90,141],"continuous":[18],"domains":[19],"with":[20],"an":[21,80],"infinite":[22],"number":[23],"possible":[25],"actions.":[26],"In":[27],"this":[28],"work,":[29],"we":[30,59,105],"introduce":[31,79],"a":[32,69,101,130],"new":[33],"update":[34],"rule":[35],"for":[36,115],"online":[37,113,166],"and":[38,111,163,171],"offline":[39,119],"RL":[40,94],"directly":[42,83],"models":[43],"value":[46],"using":[47,63],"Extreme":[48],"Value":[49],"Theory":[50],"(EVT),":[51],"drawing":[52],"inspiration":[53],"from":[54,100],"economics.":[55],"By":[56],"doing":[57],"so,":[58],"avoid":[60],"computing":[61],"Q-values":[62],"out-of-distribution":[64],"actions":[65],"is":[67,77],"often":[68],"substantial":[70],"source":[71],"error.":[73],"Our":[74,135],"key":[75],"insight":[76],"objective":[81],"that":[82,123],"optimal":[86],"soft-value":[87],"functions":[88],"(LogSumExp)":[89],"maximum":[92],"entropy":[93],"setting":[95],"without":[96],"needing":[97],"sample":[99],"policy.":[102],"Using":[103],"EVT,":[104],"derive":[106],"our":[107,177],"\\emph{Extreme":[108],"Q-Learning}":[109],"framework":[110],"consequently":[112],"and,":[114],"first":[117],"time,":[118],"MaxEnt":[120],"Q-learning":[121],"algorithms,":[122],"do":[124],"not":[125],"explicitly":[126],"access":[128],"policy":[131],"or":[132],"its":[133],"entropy.":[134],"method":[136],"obtains":[137],"consistently":[138],"strong":[139],"performance":[140],"D4RL":[143],"benchmark,":[144],"outperforming":[145],"prior":[146],"works":[147],"by":[148],"\\emph{10+":[149],"points}":[150],"on":[151,165,176],"challenging":[153],"Franka":[154],"Kitchen":[155],"tasks":[156],"while":[157],"offering":[158],"moderate":[159],"improvements":[160],"over":[161],"SAC":[162],"TD3":[164],"DM":[167],"Control":[168],"tasks.":[169],"Visualizations":[170],"code":[172],"can":[173],"be":[174],"found":[175],"website":[178],"at":[179],"https://div99.github.io/XQL/.":[180]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4313914531","counts_by_year":[{"year":2024,"cited_by_count":6}],"updated_date":"2025-01-04T17:54:10.303665","created_date":"2023-01-10"}