{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T17:23:05Z","timestamp":1730222585374,"version":"3.28.0"},"reference-count":42,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,9,2]],"date-time":"2024-09-02T00:00:00Z","timestamp":1725235200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,9,2]],"date-time":"2024-09-02T00:00:00Z","timestamp":1725235200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001321","name":"National Research Foundation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001321","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100009950","name":"Ministry of Education","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100009950","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,9,2]]},"DOI":"10.1109\/fpl64840.2024.00048","type":"proceedings-article","created":{"date-parts":[[2024,10,9]],"date-time":"2024-10-09T17:45:25Z","timestamp":1728495925000},"page":"297-303","source":"Crossref","is-referenced-by-count":0,"title":["Shedding the Bits: Pushing the Boundaries of Quantization with Minifloats on FPGAs"],"prefix":"10.1109","author":[{"given":"Shivam","family":"Aggarwal","sequence":"first","affiliation":[{"name":"National University of Singapore,School of Computing,Singapore"}]},{"given":"Hans Jakob","family":"Damsgaard","sequence":"additional","affiliation":[{"name":"Tampere University,Electrical Engineering Unit,Finland"}]},{"given":"Alessandro","family":"Pappalardo","sequence":"additional","affiliation":[{"name":"National University of Singapore,School of Computing,Singapore"}]},{"given":"Giuseppe","family":"Franco","sequence":"additional","affiliation":[{"name":"AMD,Germany"}]},{"given":"Thomas B.","family":"Preu\u00dfer","sequence":"additional","affiliation":[{"name":"AMD Research and Advanced Development,Dublin,Ireland"}]},{"given":"Michaela","family":"Blott","sequence":"additional","affiliation":[{"name":"AMD Research and Advanced Development,Dublin,Ireland"}]},{"given":"Tulika","family":"Mitra","sequence":"additional","affiliation":[{"name":"National University of Singapore,School of Computing,Singapore"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00141"},{"article-title":"Up or Down? Adaptive Rounding for Post-Training Quantization","volume-title":"Proceedings of the 37th International Conference on Machine Learning","author":"Nagel","key":"ref2"},{"key":"ref3","article-title":"GPTQ: Accurate Post-training Compression for Generative Pretrained Transformers","author":"Frantar","year":"2022","journal-title":"arXiv:2210.17323"},{"article-title":"FP8 Formats for Deep Learning","year":"2022","author":"Micikevicius","key":"ref4"},{"key":"ref5","article-title":"FP8 Quantization: The Power of the Exponent","volume":"abs\/2208.09225","author":"Kuzmin","year":"2022","journal-title":"ArXiv"},{"article-title":"8-bit Numerical Formats for Deep Neural Networks","year":"2022","author":"Noune","key":"ref6"},{"journal-title":"AMD-Xilinx","article-title":"Higher Performance Neural Networks with Small Floating Point","year":"2021","key":"ref7"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1285"},{"article-title":"SmoothQuant: Accurate and Efficient Post-Training Quantization for Large Language Models","volume-title":"Proceedings of the 40th International Conference on Machine Learning","author":"Xiao","key":"ref9"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"ref11","first-page":"1","article-title":"IEEE Standard for Floating-Point Arithmetic","year":"2008","journal-title":"IEEE Std 754-2008"},{"article-title":"The Case for 4-bit Precision: k-bit Inference Scaling Laws","year":"2023","author":"Dettmers","key":"ref12"},{"issue":"1","key":"ref13","article-title":"Efficient Design of Low Bit-width Convolutional Neural Networks on FPGA with Optimized Dot Product Units","volume-title":"ACM Trans. Reconfigurable Technol. Syst.","volume":"16","author":"V\u00e8stias"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/FPT.2009.5377624"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/FPL.2019.00062"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/3373087.3375311"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/FPL.2018.00016"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/3020078.3021744"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/2809432"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2016.2532874"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01558"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2020.2968810"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/FPT.2008.4762363"},{"article-title":"Training and Inference of Large Language Models using 8-bit Floating Point","year":"2023","author":"Perez","key":"ref24"},{"key":"ref25","first-page":"4486","article-title":"Same, Same But Different: Recovering Neural Network Quantization Error Through Weight Factorization","volume-title":"Proceedings of the 36th International Conference on Machine Learning, ser. Proceedings of Machine Learning Research","volume":"97","author":"Meller"},{"key":"ref26","article-title":"Fighting Quantization Bias with Bias","volume":"abs\/1906.03193","author":"Finkelstein","year":"2019","journal-title":"ArXiv"},{"key":"ref27","article-title":"BRECQ: Pushing the Limit of Post-Training Quantization by Block Reconstruction","volume":"abs\/2102.05426","author":"Li","year":"2021","journal-title":"ArXiv"},{"article-title":"Learning Sparse Neural Networks through L_0 Regularization","volume-title":"6th International Conference on Learning Representations, ICLR 2018. OpenReview.net","author":"Louizos","key":"ref28"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/s00607-010-0127-7"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1515\/9783110203196"},{"key":"ref31","article-title":"Design-Space Exploration for the Kulisch Accumulator","volume-title":"HAL Inria","author":"Uguen","year":"2017"},{"volume-title":"Versal Architecture and Product Data Sheet: Overview","year":"2023","key":"ref32"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1145\/3242897"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00474"},{"key":"ref36","article-title":"An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale","author":"Dosovitskiy","year":"2021","journal-title":"ICLR"},{"article-title":"Xilinx\/brevitas","year":"2023","author":"Pappalardo","key":"ref37"},{"key":"ref38","article-title":"Trained Uniform Quantization for Accurate and Efficient Neural Network Inference on Fixed-Point Hardware","volume":"abs\/1903.08066","author":"Jain","year":"2019","journal-title":"ArXiv"},{"key":"ref39","article-title":"Improving Neural Network Quantization without Retraining using Outlier Channel Splitting","volume":"abs\/1901.09504","author":"Zhao","year":"2019","journal-title":"ArXiv"},{"key":"ref40","article-title":"PACT: Parameterized Clipping Activation for Quantized Neural Networks","volume":"abs\/1805.06085","author":"Choi","year":"2018","journal-title":"ArXiv"},{"key":"ref41","first-page":"4466","article-title":"Accurate Post Training Quantization with Small Calibration Sets","volume-title":"Proceedings of the 38th International Conference on Machine Learning, ser. Proceedings of Machine Learning Research","volume":"139","author":"Hubara"},{"key":"ref42","first-page":"10271","article-title":"Pushing the Limits of Narrow Precision Inferencing at Cloud Scale with Microsoft Floating Point","volume":"33","author":"Darvish Rouhani","year":"2020","journal-title":"Advances in Neural Information Processing Systems"}],"event":{"name":"2024 34th International Conference on Field-Programmable Logic and Applications (FPL)","start":{"date-parts":[[2024,9,2]]},"location":"Torino, Italy","end":{"date-parts":[[2024,9,6]]}},"container-title":["2024 34th International Conference on Field-Programmable Logic and Applications (FPL)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10705425\/10705440\/10705489.pdf?arnumber=10705489","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,10]],"date-time":"2024-10-10T14:59:08Z","timestamp":1728572348000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10705489\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9,2]]},"references-count":42,"URL":"https:\/\/doi.org\/10.1109\/fpl64840.2024.00048","relation":{},"subject":[],"published":{"date-parts":[[2024,9,2]]}}}