{"id":"https://openalex.org/W4403883826","doi":"https://doi.org/10.48550/arxiv.2410.01428","title":"Can We Further Elicit Reasoning in LLMs? Critic-Guided Planning with\n Retrieval-Augmentation for Solving Challenging Tasks","display_name":"Can We Further Elicit Reasoning in LLMs? Critic-Guided Planning with\n Retrieval-Augmentation for Solving Challenging Tasks","publication_year":2024,"publication_date":"2024-10-02","ids":{"openalex":"https://openalex.org/W4403883826","doi":"https://doi.org/10.48550/arxiv.2410.01428"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.01428","pdf_url":"http://arxiv.org/pdf/2410.01428","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2410.01428","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5007523355","display_name":"Xin-Wang LI","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Xingxuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100585032","display_name":"Weiwen Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Weiwen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055802902","display_name":"Rui Zhao","orcid":"https://orcid.org/0000-0001-5862-0809"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Ruochen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045631551","display_name":"Fangkai Jiao","orcid":"https://orcid.org/0000-0002-0670-6990"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiao, Fangkai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005443526","display_name":"Shafiq Joty","orcid":"https://orcid.org/0000-0002-9222-2641"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Joty, Shafiq","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5086674741","display_name":"Lidong Bing","orcid":"https://orcid.org/0000-0003-4565-6313"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bing, Lidong","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":83},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10906","display_name":"AI-based Problem Solving and Planning","score":0.9508,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10906","display_name":"AI-based Problem Solving and Planning","score":0.9508,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11010","display_name":"Logic, Reasoning, and Knowledge","score":0.9318,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.43219095},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.34823424},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.34789857}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.01428","pdf_url":"http://arxiv.org/pdf/2410.01428","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.01428","pdf_url":"http://arxiv.org/pdf/2410.01428","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4396701345","https://openalex.org/W4396696052","https://openalex.org/W4391913857","https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2376932109","https://openalex.org/W2358668433","https://openalex.org/W2001405890"],"abstract_inverted_index":{"State-of-the-art":[0],"large":[1],"language":[2],"models":[3,90],"(LLMs)":[4],"exhibit":[5],"impressive":[6],"problem-solving":[7],"capabilities":[8],"but":[9,52],"may":[10],"struggle":[11],"with":[12,80],"complex":[13,32,221],"reasoning":[14,50,67,94,243],"and":[15,25,37,62,69,95,107,121,141,163,201,211,220,244],"factual":[16,42],"correctness.":[17,43],"Existing":[18],"methods":[19,45],"harness":[20],"the":[21,113,143,171,175,186,190],"strengths":[22],"of":[23,198],"chain-of-thought":[24],"retrieval-augmented":[26],"generation":[27],"(RAG)":[28],"to":[29,40,65,91,168,184],"decompose":[30],"a":[31,83,102,128,195],"problem":[33,103],"into":[34],"simpler":[35],"steps":[36],"apply":[38],"retrieval":[39,96,223],"improve":[41],"These":[44],"work":[46],"well":[47],"on":[48,55,147,208],"straightforward":[49],"tasks":[51,57],"often":[53],"falter":[54],"challenging":[56,209,238],"such":[58],"as":[59],"competitive":[60,215],"programming":[61],"mathematics,":[63],"due":[64],"frequent":[66],"errors":[68],"irrelevant":[70],"knowledge":[71],"retrieval.":[72,245],"To":[73],"address":[74],"this,":[75],"we":[76],"introduce":[77],"Critic-guided":[78],"planning":[79],"Retrieval-augmentation,":[81],"CR-Planner,":[82],"novel":[84],"framework":[85],"that":[86,228],"leverages":[87],"fine-tuned":[88],"critic":[89,129,151,164,191],"guide":[92],"both":[93,242],"processes":[97],"through":[98,139],"planning.":[99],"CR-Planner":[100,167,207,229],"solves":[101],"by":[104,124,127,160,240],"iteratively":[105],"selecting":[106,142],"executing":[108],"sub-goals.":[109],"Initially,":[110],"it":[111],"identifies":[112],"most":[114],"promising":[115],"sub-goal":[116,132,138],"from":[117,149],"reasoning,":[118,219],"query":[119],"generation,":[120],"retrieval,":[122],"guided":[123],"rewards":[125],"given":[126],"model":[130,152],"named":[131,153],"critic.":[133,155],"It":[134],"then":[135],"executes":[136],"this":[137],"sampling":[140],"optimal":[144],"output":[145],"based":[146],"evaluations":[148],"another":[150],"execution":[154],"This":[156],"iterative":[157],"process,":[158],"informed":[159],"retrieved":[161],"information":[162],"models,":[165,192],"enables":[166],"effectively":[169],"navigate":[170],"solution":[172],"space":[173],"towards":[174],"final":[176],"answer.":[177],"We":[178,205],"employ":[179],"Monte":[180],"Carlo":[181],"Tree":[182],"Search":[183],"collect":[185],"data":[187],"for":[188,194],"training":[189],"allowing":[193],"systematic":[196],"exploration":[197],"action":[199],"sequences":[200],"their":[202],"long-term":[203],"impacts.":[204],"validate":[206],"domain-knowledge-intensive":[210],"reasoning-heavy":[212],"tasks,":[213],"including":[214],"programming,":[216],"theorem-driven":[217],"math":[218],"domain":[222],"problems.":[224],"Our":[225],"experiments":[226],"demonstrate":[227],"significantly":[230],"outperforms":[231],"baselines,":[232],"highlighting":[233],"its":[234],"effectiveness":[235],"in":[236],"addressing":[237],"problems":[239],"improving":[241]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4403883826","counts_by_year":[],"updated_date":"2025-01-18T04:11:30.755320","created_date":"2024-10-30"}