{"id":"https://openalex.org/W4315588664","doi":"https://doi.org/10.48550/arxiv.2301.03043","title":"XDQN: Inherently Interpretable DQN through Mimicking","display_name":"XDQN: Inherently Interpretable DQN through Mimicking","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4315588664","doi":"https://doi.org/10.48550/arxiv.2301.03043"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2301.03043","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2301.03043","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5088371409","display_name":"Andreas Kontogiannis","orcid":"https://orcid.org/0000-0001-7161-5326"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kontogiannis, Andreas","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5040575826","display_name":"George A. Vouros","orcid":"https://orcid.org/0000-0001-5451-622X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vouros, George","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":67},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.9717,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.9717,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9529,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11489","display_name":"Air Traffic Management and Optimization","score":0.9091,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.73595124},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7032534},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.61307865},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4432603}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2301.03043","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2301.03043","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2301.03043","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4386462264","https://openalex.org/W4364306694","https://openalex.org/W4312192474","https://openalex.org/W4306674287","https://openalex.org/W4283697347","https://openalex.org/W4210805261","https://openalex.org/W3170094116","https://openalex.org/W3107602296","https://openalex.org/W3046775127","https://openalex.org/W2961085424"],"abstract_inverted_index":{"Although":[0],"deep":[1],"reinforcement":[2],"learning":[3],"(DRL)":[4],"methods":[5],"have":[6],"been":[7],"successfully":[8],"applied":[9],"in":[10,15,33,87,106],"challenging":[11],"tasks,":[12],"their":[13],"application":[14],"real-world":[16,90],"operational":[17,91],"settings":[18],"is":[19,35,85,104],"challenged":[20,86],"by":[21],"methods'":[22],"limited":[23],"ability":[24],"to":[25,111,124,131],"provide":[26,132],"explanations.":[27],"Among":[28],"the":[29,36,49,54,112],"paradigms":[30],"for":[31],"explainability":[32],"DRL":[34,50,55],"interpretable":[37,42,78],"box":[38],"design":[39],"paradigm,":[40],"where":[41,94],"models":[43,47],"substitute":[44],"inner":[45],"constituent":[46],"of":[48,73,116,126,138],"method,":[51],"thus":[52],"making":[53],"method":[56],"\"inherently\"":[57],"interpretable.":[58],"In":[59],"this":[60,64],"paper":[61],"we":[62,67],"explore":[63],"paradigm":[65],"and":[66,136],"propose":[68],"XDQN,":[69],"an":[70,77],"explainable":[71],"variation":[72],"DQN,":[74,127],"which":[75],"uses":[76],"policy":[79],"model":[80],"trained":[81],"through":[82],"mimicking.":[83],"XDQN":[84,103,120],"a":[88],"complex,":[89],"multi-agent":[92],"problem,":[93],"agents":[95],"are":[96,141],"independent":[97],"learners":[98],"solving":[99],"congestion":[100],"problems.":[101],"Specifically,":[102],"evaluated":[105],"three":[107],"MARL":[108],"scenarios,":[109],"pertaining":[110],"demand-capacity":[113],"balancing":[114],"problem":[115],"air":[117],"traffic":[118],"management.":[119],"achieves":[121],"performance":[122],"similar":[123],"that":[125],"while":[128],"its":[129],"abilities":[130],"global":[133],"models'":[134],"interpretations":[135,137],"local":[139],"decisions":[140],"demonstrated.":[142]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4315588664","counts_by_year":[],"updated_date":"2025-01-06T19:34:40.338590","created_date":"2023-01-11"}