{"id":"https://openalex.org/W4403747985","doi":"https://doi.org/10.48550/arxiv.2409.12798","title":"Assessing the Zero-Shot Capabilities of LLMs for Action Evaluation in RL","display_name":"Assessing the Zero-Shot Capabilities of LLMs for Action Evaluation in RL","publication_year":2024,"publication_date":"2024-09-19","ids":{"openalex":"https://openalex.org/W4403747985","doi":"https://doi.org/10.48550/arxiv.2409.12798"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2409.12798","pdf_url":"http://arxiv.org/pdf/2409.12798","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2409.12798","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047676934","display_name":"Eduardo Pignatelli","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pignatelli, Eduardo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087706654","display_name":"Johan Ferret","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ferret, Johan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114400481","display_name":"Tim Rock\u00e4schel","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rock\u00e4schel, Tim","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023508792","display_name":"Edward Grefenstette","orcid":"https://orcid.org/0000-0003-1164-8809"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Grefenstette, Edward","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114400482","display_name":"Davide Paglieri","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Paglieri, Davide","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114400483","display_name":"Samuel Coward","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Coward, Samuel","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5071335453","display_name":"Laura Toni","orcid":"https://orcid.org/0000-0002-8441-8791"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Toni, Laura","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":84},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T13295","display_name":"Safety Systems Engineering in Autonomy","score":0.8719,"subfield":{"id":"https://openalex.org/subfields/2213","display_name":"Safety, Risk, Reliability and Quality"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13295","display_name":"Safety Systems Engineering in Autonomy","score":0.8719,"subfield":{"id":"https://openalex.org/subfields/2213","display_name":"Safety, Risk, Reliability and Quality"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10734","display_name":"Information and Cyber Security","score":0.8242,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12423","display_name":"Software Reliability and Analysis Research","score":0.8196,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/zero","display_name":"Zero (linguistics)","score":0.7660662}],"concepts":[{"id":"https://openalex.org/C2780813799","wikidata":"https://www.wikidata.org/wiki/Q3274237","display_name":"Zero (linguistics)","level":2,"score":0.7660662},{"id":"https://openalex.org/C2778344882","wikidata":"https://www.wikidata.org/wiki/Q278938","display_name":"Shot (pellet)","level":2,"score":0.75007534},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.6389099},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.37443995},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.34512317},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.19289574},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.12836146},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.074955136},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2409.12798","pdf_url":"http://arxiv.org/pdf/2409.12798","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2409.12798","pdf_url":"http://arxiv.org/pdf/2409.12798","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W4214877189","https://openalex.org/W2980279061","https://openalex.org/W2899084033","https://openalex.org/W2773965352","https://openalex.org/W2748952813","https://openalex.org/W2381179799","https://openalex.org/W2366718574","https://openalex.org/W2334685461","https://openalex.org/W2074502265"],"abstract_inverted_index":{"The":[0],"temporal":[1],"credit":[2,94,183,206],"assignment":[3,95,207],"problem":[4],"is":[5,35,42,129,150,201],"a":[6,23,30,83,107,127,162,168,202],"central":[7],"challenge":[8],"in":[9,22,119,181,184,208],"Reinforcement":[10],"Learning":[11],"(RL),":[12],"concerned":[13],"with":[14,79],"attributing":[15],"the":[16,39,74,114,143,147,155,197,211],"appropriate":[17],"influence":[18],"to":[19,28,92,105],"each":[20],"actions":[21],"trajectory":[24],"for":[25,76,157,205],"their":[26,65],"ability":[27],"achieve":[29],"goal.":[31],"However,":[32],"when":[33,146],"feedback":[34],"delayed":[36,153],"and":[37,44,55,61,67,99,112,131,152],"sparse,":[38],"learning":[40,144],"signal":[41,140],"poor,":[43],"action":[45],"evaluation":[46,164],"becomes":[47],"harder.":[48],"Canonical":[49],"solutions,":[50],"such":[51],"as":[52],"reward":[53,97,139,149],"shaping":[54,98],"options,":[56],"require":[57],"extensive":[58],"domain":[59],"knowledge":[60,198,215],"manual":[62],"intervention,":[63],"limiting":[64],"scalability":[66],"applicability.":[68],"In":[69],"this":[70],"work,":[71],"we":[72],"lay":[73],"foundations":[75],"Credit":[77],"Assignment":[78],"Language":[80,89],"Models":[81,90],"(CALM),":[82],"novel":[84],"approach":[85],"that":[86,176,196],"leverages":[87],"Large":[88],"(LLMs)":[91],"automate":[93],"via":[96],"options":[100],"discovery.":[101],"CALM":[102,132,166],"uses":[103],"LLMs":[104,177,200],"decompose":[106],"task":[108,148],"into":[109,216],"elementary":[110],"subgoals":[111,118],"assess":[113],"achievement":[115],"of":[116,165,170,199,213],"these":[117],"state-action":[120],"transitions.":[121],"Every":[122],"time":[123],"an":[124,134],"option":[125],"terminates,":[126],"subgoal":[128],"achieved,":[130],"provides":[133],"auxiliary":[135],"reward.":[136],"This":[137],"additional":[138],"can":[141,178],"enhance":[142],"process":[145],"sparse":[151],"without":[154,187],"need":[156],"human-designed":[158],"rewards.":[159],"We":[160],"provide":[161],"preliminary":[163,193],"using":[167],"dataset":[169],"human-annotated":[171],"demonstrations":[172],"from":[173],"MiniHack,":[174],"suggesting":[175],"be":[179],"effective":[180],"assigning":[182],"zero-shot":[185],"settings,":[186],"examples":[188],"or":[189],"LLM":[190],"fine-tuning.":[191],"Our":[192],"results":[194],"indicate":[195],"promising":[203],"prior":[204],"RL,":[209],"facilitating":[210],"transfer":[212],"human":[214],"value":[217],"functions.":[218]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4403747985","counts_by_year":[],"updated_date":"2024-12-15T17:26:30.323765","created_date":"2024-10-25"}