{"id":"https://openalex.org/W4400267192","doi":"https://doi.org/10.48550/arxiv.2407.00215","title":"LLM Critics Help Catch LLM Bugs","display_name":"LLM Critics Help Catch LLM Bugs","publication_year":2024,"publication_date":"2024-06-28","ids":{"openalex":"https://openalex.org/W4400267192","doi":"https://doi.org/10.48550/arxiv.2407.00215"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.00215","pdf_url":"http://arxiv.org/pdf/2407.00215","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2407.00215","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5003470218","display_name":"Nat McAleese","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"McAleese, Nat","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5099881264","display_name":"Rai Michael Pokorny","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pokorny, Rai Michael","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5099881265","display_name":"Juan Felipe Ceron Uribe","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Uribe, Juan Felipe Ceron","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5099881266","display_name":"Evgenia Nitishinskaya","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nitishinskaya, Evgenia","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033339425","display_name":"Maja Tr\u0119bacz","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Trebacz, Maja","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5099881267","display_name":"Jan Leike","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Leike, Jan","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.999961,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":97},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T13851","display_name":"Law, AI, and Intellectual Property","score":0.8528,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13851","display_name":"Law, AI, and Intellectual Property","score":0.8528,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11750","display_name":"Phytoplasmas and Hemiptera pathogens","score":0.7868,"subfield":{"id":"https://openalex.org/subfields/1110","display_name":"Plant Science"},"field":{"id":"https://openalex.org/fields/11","display_name":"Agricultural and Biological Sciences"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12733","display_name":"Vector-Borne Animal Diseases","score":0.7462,"subfield":{"id":"https://openalex.org/subfields/1105","display_name":"Ecology, Evolution, Behavior and Systematics"},"field":{"id":"https://openalex.org/fields/11","display_name":"Agricultural and Biological Sciences"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4225507}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.00215","pdf_url":"http://arxiv.org/pdf/2407.00215","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.00215","pdf_url":"http://arxiv.org/pdf/2407.00215","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4396701345","https://openalex.org/W4396696052","https://openalex.org/W4395014643","https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2382290278","https://openalex.org/W2376932109","https://openalex.org/W2358668433","https://openalex.org/W2001405890"],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1],"from":[2,59],"human":[3,21,75,82,91],"feedback":[4,54],"(RLHF)":[5],"is":[6],"fundamentally":[7],"limited":[8],"by":[9],"the":[10,120,132],"capacity":[11],"of":[12,79,109,122,139,160,167],"humans":[13,35,148],"to":[14,36,50,169],"correctly":[15],"evaluate":[16,39],"model":[17],"output.":[18],"To":[19],"improve":[20],"evaluation":[22,83],"ability":[23],"and":[24,81,128,162],"overcome":[25],"that":[26,33,85,100,145],"limitation":[27],"this":[28],"work":[29],"trains":[30],"\"critic\"":[31],"models":[32,86],"help":[34],"more":[37,88],"accurately":[38],"model-written":[40,70],"code.":[41],"These":[42],"critics":[43,104,161,171],"are":[44,72,125],"themselves":[45],"LLMs":[46,176],"trained":[47],"with":[48],"RLHF":[49],"write":[51],"natural":[52],"language":[53],"highlighting":[55],"problems":[56],"in":[57,77,111],"code":[58,64,95],"real-world":[60],"assistant":[61],"tasks.":[62],"On":[63],"containing":[65],"naturally":[66],"occurring":[67],"LLM":[68,103,170],"errors":[69,110],"critiques":[71,76],"preferred":[73],"over":[74],"63%":[78],"cases,":[80],"finds":[84],"catch":[87,164],"bugs":[89,144,168],"than":[90,175],"contractors":[92,163],"paid":[93],"for":[94,131],"review.":[96],"We":[97],"further":[98],"confirm":[99],"our":[101],"fine-tuned":[102],"can":[105,136],"successfully":[106],"identify":[107],"hundreds":[108],"ChatGPT":[112],"training":[113],"data":[114],"rated":[115],"as":[116],"\"flawless\",":[117],"even":[118],"though":[119],"majority":[121],"those":[123],"tasks":[124,127],"non-code":[126],"thus":[129],"out-of-distribution":[130],"critic":[133],"model.":[134],"Critics":[135],"have":[137,154],"limitations":[138],"their":[140],"own,":[141],"including":[142],"hallucinated":[143],"could":[146],"mislead":[147],"into":[149],"making":[150],"mistakes":[151],"they":[152],"might":[153],"otherwise":[155],"avoided,":[156],"but":[157],"human-machine":[158],"teams":[159],"similar":[165],"numbers":[166],"while":[172],"hallucinating":[173],"less":[174],"alone.":[177]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4400267192","counts_by_year":[{"year":2024,"cited_by_count":2}],"updated_date":"2025-01-17T05:12:19.720768","created_date":"2024-07-03"}