{"id":"https://openalex.org/W4404574036","doi":"https://doi.org/10.48550/arxiv.2411.12580","title":"Procedural Knowledge in Pretraining Drives Reasoning in Large Language\n Models","display_name":"Procedural Knowledge in Pretraining Drives Reasoning in Large Language\n Models","publication_year":2024,"publication_date":"2024-11-19","ids":{"openalex":"https://openalex.org/W4404574036","doi":"https://doi.org/10.48550/arxiv.2411.12580"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2411.12580","pdf_url":"http://arxiv.org/pdf/2411.12580","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2411.12580","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5060010430","display_name":"Laura Ruis","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ruis, Laura","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038732160","display_name":"Maximilian Mozes","orcid":"https://orcid.org/0000-0001-8138-3792"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mozes, Maximilian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027005127","display_name":"Jong Bin Bae","orcid":"https://orcid.org/0000-0002-3913-1011"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bae, Juhan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054907218","display_name":"Siddhartha Rao Kamalakara","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kamalakara, Siddhartha Rao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5098901824","display_name":"Dwarak Talupuru","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Talupuru, Dwarak","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086062922","display_name":"A. Locatelli","orcid":"https://orcid.org/0000-0002-7122-3311"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Locatelli, Acyr","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051243028","display_name":"Robert Kirk","orcid":"https://orcid.org/0000-0002-6541-5915"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kirk, Robert","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079315903","display_name":"Tim Rockt\u00e4schel","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rockt\u00e4schel, Tim","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023508792","display_name":"Edward Grefenstette","orcid":"https://orcid.org/0000-0003-1164-8809"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Grefenstette, Edward","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5018864006","display_name":"Max Bartolo","orcid":"https://orcid.org/0009-0007-3301-7895"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bartolo, Max","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":84},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9903,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9903,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.961,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9496,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.55356586},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.34436762},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3319049},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.3264218},{"id":"https://openalex.org/C188147891","wikidata":"https://www.wikidata.org/wiki/Q147638","display_name":"Cognitive science","level":1,"score":0.3209092}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2411.12580","pdf_url":"http://arxiv.org/pdf/2411.12580","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2411.12580","pdf_url":"http://arxiv.org/pdf/2411.12580","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4396701345","https://openalex.org/W4396696052","https://openalex.org/W4391913857","https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2376932109","https://openalex.org/W2358668433","https://openalex.org/W2001405890"],"abstract_inverted_index":{"The":[0,58],"capabilities":[1],"and":[2,115,117,137,275],"limitations":[3],"of":[4,54,61,67,91,111,119,161,185,291],"Large":[5],"Language":[6],"Models":[7],"have":[8],"been":[9],"sketched":[10],"out":[11],"in":[12,15,64,200],"great":[13],"detail":[14],"recent":[16],"years,":[17],"providing":[18],"an":[19],"intriguing":[20],"yet":[21],"conflicting":[22],"picture.":[23],"On":[24,36],"the":[25,37,52,65,74,102,128,141,154,179,183,192,201,209,221,224,231,236,243,265,269],"one":[26],"hand,":[27,39],"LLMs":[28,68,94],"demonstrate":[29],"a":[30,167,171,255,278,288],"general":[31],"ability":[32],"to":[33,47,78,140,194,223,253,267],"solve":[34],"problems.":[35],"other":[38],"they":[40,105],"show":[41,198,214],"surprising":[42],"reasoning":[43,98,135,176,207,226,237,268],"gaps":[44],"when":[45,96],"compared":[46],"humans,":[48],"casting":[49],"doubt":[50],"on":[51,157],"robustness":[53],"their":[55,120],"generalisation":[56,92],"strategies.":[57],"sheer":[59],"volume":[60],"data":[62,104,142,162],"used":[63,77],"design":[66],"has":[69,170],"precluded":[70],"us":[71],"from":[72,285],"applying":[73],"method":[75],"traditionally":[76],"measure":[79],"generalisation:":[80],"train-test":[81],"set":[82],"separation.":[83],"To":[84],"overcome":[85],"this,":[86],"we":[87,123,229,240],"study":[88],"what":[89,125],"kind":[90],"strategies":[93],"employ":[95],"performing":[97],"tasks":[99,136],"by":[100],"investigating":[101],"pretraining":[103,121],"rely":[106,156],"on.":[107],"For":[108],"two":[109],"models":[110,155,270],"different":[112,175],"sizes":[113],"(7B":[114],"35B)":[116],"2.5B":[118],"tokens,":[122],"identify":[124],"documents":[126,234,245,286],"influence":[127,173],"model":[129],"outputs":[130],"for":[131,146,163,206,235],"three":[132],"simple":[133],"mathematical":[134],"contrast":[138],"this":[139],"that":[143,191,242,264,281],"are":[144],"influential":[145,203,244],"answering":[147],"factual":[148,165,195],"questions.":[149],"We":[150,188],"find":[151,190],"that,":[152],"while":[153],"mostly":[158],"distinct":[159],"sets":[160],"each":[164],"question,":[166],"document":[168],"often":[169,197,246],"similar":[172,289],"across":[174],"questions":[177,196,208,238],"within":[178],"same":[180],"task,":[181],"indicating":[182],"presence":[184],"procedural":[186,248,283],"knowledge.":[187],"further":[189],"answers":[193,210,222],"up":[199,215],"most":[202],"data.":[204],"However,":[205],"usually":[211],"do":[212,220],"not":[213],"as":[216],"highly":[217],"influential,":[218],"nor":[219],"intermediate":[225],"steps.":[227],"When":[228],"characterise":[230],"top":[232],"ranked":[233],"qualitatively,":[239],"confirm":[241],"contain":[247],"knowledge,":[249],"like":[250,277],"demonstrating":[251],"how":[252],"obtain":[254],"solution":[256],"using":[257],"formulae":[258],"or":[259],"code.":[260],"Our":[261],"findings":[262],"indicate":[263],"approach":[266],"use":[271],"is":[272],"unlike":[273],"retrieval,":[274],"more":[276],"generalisable":[279],"strategy":[280],"synthesises":[282],"knowledge":[284],"doing":[287],"form":[290],"reasoning.":[292]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4404574036","counts_by_year":[],"updated_date":"2024-12-13T07:14:40.923247","created_date":"2024-11-21"}