{"id":"https://openalex.org/W4404404376","doi":"https://doi.org/10.48550/arxiv.2411.04330","title":"Scaling Laws for Precision","display_name":"Scaling Laws for Precision","publication_year":2024,"publication_date":"2024-11-06","ids":{"openalex":"https://openalex.org/W4404404376","doi":"https://doi.org/10.48550/arxiv.2411.04330"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2411.04330","pdf_url":"http://arxiv.org/pdf/2411.04330","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2411.04330","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5104120561","display_name":"Tanishq Kumar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kumar, Tanishq","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114657958","display_name":"Zachary Ankner","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ankner, Zachary","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054443111","display_name":"Brian F. Spector","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Spector, Benjamin F.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039282308","display_name":"Blake Bordelon","orcid":"https://orcid.org/0000-0003-0455-9445"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bordelon, Blake","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000043237","display_name":"Niklas Muennighoff","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Muennighoff, Niklas","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036307641","display_name":"Mansheej Paul","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Paul, Mansheej","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023195984","display_name":"Cengiz Pehlevan","orcid":"https://orcid.org/0000-0001-9767-6063"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pehlevan, Cengiz","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103852640","display_name":"Christopher R\u00e9","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"R\u00e9, Christopher","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5031731960","display_name":"Aditi Raghunathan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Raghunathan, Aditi","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":82},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10778","display_name":"Philosophy and History of Science","score":0.113,"subfield":{"id":"https://openalex.org/subfields/1207","display_name":"History and Philosophy of Science"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10778","display_name":"Philosophy and History of Science","score":0.113,"subfield":{"id":"https://openalex.org/subfields/1207","display_name":"History and Philosophy of Science"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scaling-law","display_name":"Scaling law","score":0.6722405}],"concepts":[{"id":"https://openalex.org/C2988430800","wikidata":"https://www.wikidata.org/wiki/Q428971","display_name":"Scaling law","level":3,"score":0.6722405},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.5966425},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.436977},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.36150742},{"id":"https://openalex.org/C121864883","wikidata":"https://www.wikidata.org/wiki/Q677916","display_name":"Statistical physics","level":1,"score":0.35358363},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.26126006},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.24963513},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.08489752}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2411.04330","pdf_url":"http://arxiv.org/pdf/2411.04330","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2411.04330","pdf_url":"http://arxiv.org/pdf/2411.04330","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4298190863","https://openalex.org/W3217323610","https://openalex.org/W3102781811","https://openalex.org/W3100245096","https://openalex.org/W2519545954","https://openalex.org/W2117748264","https://openalex.org/W2083192414","https://openalex.org/W2056843536","https://openalex.org/W1995826969","https://openalex.org/W1990001655"],"abstract_inverted_index":{"Low":[0],"precision":[1,42,61,119],"training":[2,33,39,58,114,145],"and":[3,9,34,62,111,131,146,158],"inference":[4,147],"affect":[5],"both":[6,32],"the":[7,44,53,70,100,126],"quality":[8],"cost":[10],"of":[11,102],"language":[12],"models,":[13],"but":[14],"current":[15],"scaling":[16,29,94,127],"laws":[17,30,95,128],"do":[18],"not":[19],"account":[20],"for":[21,31,129],"this.":[22],"In":[23],"this":[24],"work,":[25],"we":[26,67],"devise":[27],"\"precision-aware\"":[28],"inference.":[35],"We":[36,124,151],"propose":[37],"that":[38,69,113,141],"in":[40,59,108,117,148],"lower":[41,118],"reduces":[43],"model's":[45],"\"effective":[46],"parameter":[47],"count,\"":[48],"allowing":[49],"us":[50,97],"to":[51,98,134,166,172],"predict":[52,99],"additional":[54,86],"loss":[55,101],"incurred":[56],"from":[57,144],"low":[60],"post-train":[63],"quantization.":[64],"For":[65,91],"inference,":[66],"find":[68],"degradation":[71,143],"introduced":[72],"by":[73],"post-training":[74],"quantization":[75,133],"increases":[76],"as":[77],"models":[78,116],"are":[79],"trained":[80,169],"on":[81,153,162,170],"more":[82],"data,":[83],"eventually":[84],"making":[85],"pretraining":[87,132,156],"data":[88],"actively":[89],"harmful.":[90],"training,":[92],"our":[93,160],"allow":[96],"a":[103,137],"model":[104,163],"with":[105],"different":[106,109],"parts":[107],"precisions,":[110],"suggest":[112],"larger":[115],"may":[120],"be":[121],"compute":[122],"optimal.":[123],"unify":[125],"post":[130],"arrive":[135],"at":[136],"single":[138],"functional":[139],"form":[140],"predicts":[142],"varied":[149],"precisions.":[150],"fit":[152],"over":[154],"465":[155],"runs":[157],"validate":[159],"predictions":[161],"sizes":[164],"up":[165,171],"1.7B":[167],"parameters":[168],"26B":[173],"tokens.":[174]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4404404376","counts_by_year":[],"updated_date":"2025-01-21T16:38:03.732412","created_date":"2024-11-16"}