{"id":"https://openalex.org/W4224903119","doi":"https://doi.org/10.48550/arxiv.2204.09656","title":"A Fast Post-Training Pruning Framework for Transformers","display_name":"A Fast Post-Training Pruning Framework for Transformers","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4224903119","doi":"https://doi.org/10.48550/arxiv.2204.09656"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2204.09656","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2204.09656","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5066299337","display_name":"Woosuk Kwon","orcid":"https://orcid.org/0009-0008-8870-4892"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kwon, Woosuk","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090175489","display_name":"Sehoon Kim","orcid":"https://orcid.org/0000-0002-8074-1006"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kim, Sehoon","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033006662","display_name":"Michael W. Mahoney","orcid":"https://orcid.org/0000-0001-7920-4652"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mahoney, Michael W.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012298536","display_name":"Joseph Hassoun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hassoun, Joseph","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047285420","display_name":"Kurt Keutzer","orcid":"https://orcid.org/0000-0003-3868-8501"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Keutzer, Kurt","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5103894843","display_name":"Amir Gholami","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gholami, Amir","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":23,"citation_normalized_percentile":{"value":0.854089,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":97},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.999,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.999,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9892,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9771,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.69397366},{"id":"https://openalex.org/keywords/retraining","display_name":"Retraining","score":0.6707742},{"id":"https://openalex.org/keywords/pruning","display_name":"Pruning","score":0.5750381},{"id":"https://openalex.org/keywords/flops","display_name":"FLOPS","score":0.45072153}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.78273785},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.7046625},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.69397366},{"id":"https://openalex.org/C2778712577","wikidata":"https://www.wikidata.org/wiki/Q3505966","display_name":"Retraining","level":2,"score":0.6707742},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.63316107},{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.5750381},{"id":"https://openalex.org/C3826847","wikidata":"https://www.wikidata.org/wiki/Q188768","display_name":"FLOPS","level":2,"score":0.45072153},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.44072524},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.39754665},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.34530124},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.33889797},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.18623567},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.08045301},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C155202549","wikidata":"https://www.wikidata.org/wiki/Q178803","display_name":"International trade","level":1,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C6557445","wikidata":"https://www.wikidata.org/wiki/Q173113","display_name":"Agronomy","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2204.09656","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2204.09656","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2204.09656","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W98480971","https://openalex.org/W4292794827","https://openalex.org/W4224939635","https://openalex.org/W3092292339","https://openalex.org/W2164382479","https://openalex.org/W2150291671","https://openalex.org/W2146343568","https://openalex.org/W2058965144","https://openalex.org/W2027972911","https://openalex.org/W2013643406"],"abstract_inverted_index":{"Pruning":[0],"is":[1,187],"an":[2],"effective":[3],"way":[4],"to":[5,33,39,105,136,154],"reduce":[6],"the":[7,22,75,109,117,126,200],"huge":[8],"inference":[9,163],"cost":[10,29],"of":[11,191],"Transformer":[12,76],"models.":[13,23,201],"However,":[14],"prior":[15],"work":[16],"on":[17,108,145,182],"pruning":[18,53,196],"Transformers":[19,56,176],"requires":[20],"retraining":[21],"This":[24],"can":[25],"add":[26],"high":[27,31,84],"training":[28],"and":[30,67,103,120,138,140,147,159],"complexity":[32],"model":[34,77],"deployment,":[35],"making":[36],"it":[37],"difficult":[38],"use":[40],"in":[41,157,162,170,177],"many":[42],"practical":[43],"situations.":[44],"To":[45,82],"address":[46],"this,":[47],"we":[48,88,141],"propose":[49],"a":[50,64,68,94,183],"fast":[51],"post-training":[52],"framework":[54,72,151,174],"for":[55,129],"that":[57,99,115,124,198],"does":[58],"not":[59],"require":[60],"any":[61],"retraining.":[62],"Given":[63],"resource":[65],"constraint":[66],"sample":[69],"dataset,":[70],"our":[71,134,173],"automatically":[73],"prunes":[74,175],"using":[78],"structured":[79],"sparsity":[80],"methods.":[81],"retain":[83],"accuracy":[85],"without":[86],"retraining,":[87],"introduce":[89],"three":[90],"novel":[91],"techniques:":[92],"(i)":[93],"lightweight":[95],"mask":[96,113,122],"search":[97,118],"algorithm":[98],"finds":[100],"which":[101,186],"heads":[102],"filters":[104],"prune":[106],"based":[107],"Fisher":[110],"information;":[111],"(ii)":[112],"rearrangement":[114],"complements":[116],"algorithm;":[119],"(iii)":[121],"tuning":[123],"reconstructs":[125],"output":[127],"activations":[128],"each":[130],"layer.":[131],"We":[132],"apply":[133],"method":[135],"BERT-base":[137],"DistilBERT,":[139],"evaluate":[142],"its":[143],"effectiveness":[144],"GLUE":[146],"SQuAD":[148],"benchmarks.":[149],"Our":[150],"achieves":[152],"up":[153],"2.0x":[155],"reduction":[156],"FLOPs":[158],"1.56x":[160],"speedup":[161],"latency,":[164],"while":[165],"maintaining":[166],"<":[167],"1%":[168],"loss":[169],"accuracy.":[171],"Importantly,":[172],"less":[178],"than":[179,194],"3":[180],"minutes":[181],"single":[184],"GPU,":[185],"over":[188],"two":[189],"orders":[190],"magnitude":[192],"faster":[193],"existing":[195],"approaches":[197],"retrain":[199]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4224903119","counts_by_year":[{"year":2024,"cited_by_count":16},{"year":2023,"cited_by_count":7}],"updated_date":"2025-01-04T15:11:46.003513","created_date":"2022-04-27"}