{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,11]],"date-time":"2024-09-11T16:51:50Z","timestamp":1726073510571},"publisher-location":"New York, NY, USA","reference-count":25,"publisher":"ACM","license":[{"start":{"date-parts":[[2019,9,30]],"date-time":"2019-09-30T00:00:00Z","timestamp":1569801600000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2019,9,30]]},"DOI":"10.1145\/3357526.3357569","type":"proceedings-article","created":{"date-parts":[[2019,11,6]],"date-time":"2019-11-06T14:25:56Z","timestamp":1573050356000},"update-policy":"http:\/\/dx.doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":7,"title":["Memory system characterization of deep learning workloads"],"prefix":"10.1145","author":[{"given":"Zeshan","family":"Chishti","sequence":"first","affiliation":[{"name":"Intel Labs"}]},{"given":"Berkin","family":"Akin","sequence":"additional","affiliation":[{"name":"Google"}]}],"member":"320","published-online":{"date-parts":[[2019,9,30]]},"reference":[{"key":"e_1_3_2_1_1_1","first-page":"1","article-title":"In-datacenter performance analysis of a Tensor Processing Unit","author":"Jouppi N. P.","year":"2017","journal-title":"Proceedings of the 44th ACM\/IEEE Annual International Symposium on Computer Architecture (ISCA)"},{"key":"e_1_3_2_1_2_1","first-page":"1","article-title":"A configurable cloud-scale DNN processor for real-time AI","author":"Fowers J.","year":"2018","journal-title":"Proceesings of the 45th ACM\/IEEE Annual International Symposium on Computer Architecture (ISCA)"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2014.58"},{"key":"e_1_3_2_1_4_1","first-page":"367","article-title":"Eyeriss: A spatial architecture for energy-efficient dataflow for convolutional neural networks","author":"Chen Y. H.","year":"2016","journal-title":"Proceedings of the 43rd ACM\/IEEE Annual International Symposium on Computer Architecture (ISCA)"},{"key":"e_1_3_2_1_5_1","unstructured":"\"Intel architecture instruction set extensions and future features programming reference \" Online: https:\/\/software.intel.com\/sites\/default\/files\/managed\/c5\/15\/architecture-instruction-set-extensions-programming-reference.pdf October 2018. \"Intel architecture instruction set extensions and future features programming reference \" Online: https:\/\/software.intel.com\/sites\/default\/files\/managed\/c5\/15\/architecture-instruction-set-extensions-programming-reference.pdf October 2018."},{"key":"e_1_3_2_1_6_1","unstructured":"V. Vanhoucke A. Senior and M. Z. Mao \"Improving the speed of neural networks on CPUs \" In Deep Learning and Unsupervised Feature Learning Workshop (NIPS) 2011. V. Vanhoucke A. Senior and M. Z. Mao \"Improving the speed of neural networks on CPUs \" In Deep Learning and Unsupervised Feature Learning Workshop (NIPS) 2011."},{"key":"e_1_3_2_1_7_1","unstructured":"Y. Liu Y. Wang R. Yu M. Li V. Sharma and Y. Wang \"Optimizing CNN model inference on CPUs \" In Proceedings of the 2019 USENIX Annual Technical Conference pp. 1025--1039 July 2019. Y. Liu Y. Wang R. Yu M. Li V. Sharma and Y. Wang \"Optimizing CNN model inference on CPUs \" In Proceedings of the 2019 USENIX Annual Technical Conference pp. 1025--1039 July 2019."},{"key":"e_1_3_2_1_8_1","unstructured":"L. Tang etal \"Scheduling computation graphs of deep learning models on manycore CPUs \" arXiv preprint arXiv:1807.09667 2018. L. Tang et al. \"Scheduling computation graphs of deep learning models on manycore CPUs \" arXiv preprint arXiv:1807.09667 2018."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"crossref","unstructured":"K. he at al. \"Deep residual learning for image recognition \" In Proceedings of the 2016 IEEE Conference on Computer Vision and Pattern Recognition pp. 770--778 2016. K. he at al. \"Deep residual learning for image recognition \" In Proceedings of the 2016 IEEE Conference on Computer Vision and Pattern Recognition pp. 770--778 2016.","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_10_1","unstructured":"K. Simonyan and A. Zisserman \"Very deep convolutional networks for large-scale image recognition \" arXiv preprint arXiv:1409.1556 2014. K. Simonyan and A. Zisserman \"Very deep convolutional networks for large-scale image recognition \" arXiv preprint arXiv:1409.1556 2014."},{"key":"e_1_3_2_1_11_1","unstructured":"A. Krizhevsky I. Sutskever and G. E. Hinton \"ImageNet classification with deep convolutional neural networks \" Advances in neural information processing systems pp. 1097--1105 2012. A. Krizhevsky I. Sutskever and G. E. Hinton \"ImageNet classification with deep convolutional neural networks \" Advances in neural information processing systems pp. 1097--1105 2012."},{"key":"e_1_3_2_1_12_1","unstructured":"J. Dean \"Machine learning for systems and systems for machine learning \" In Proceedings of the 31st Conference on Neural Information Processing Systems (NIPS) 2017 http:\/\/learningsys.org\/nips17\/assets\/slides\/dean-nips17.pdf J. Dean \"Machine learning for systems and systems for machine learning \" In Proceedings of the 31st Conference on Neural Information Processing Systems (NIPS) 2017 http:\/\/learningsys.org\/nips17\/assets\/slides\/dean-nips17.pdf"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"crossref","unstructured":"K. Hazlewood S. Bird D. Brooks S. Chintala U. Diril D. Dzhulgakov M. Fawzy B. Jia Y. Jia A. Kalro J. Law K. Lee J. Lu P. Noordhuis M. Smelyanskiy L. Xiong and X. Wang \"Applied machine learning at Facebook: A datacenter infrastructure perspective \" In Proceedings of the 2018 IEEE International Symposium on High Performance Computer Architecture (HPCA) pp. 620--629 2018. K. Hazlewood S. Bird D. Brooks S. Chintala U. Diril D. Dzhulgakov M. Fawzy B. Jia Y. Jia A. Kalro J. Law K. Lee J. Lu P. Noordhuis M. Smelyanskiy L. Xiong and X. Wang \"Applied machine learning at Facebook: A datacenter infrastructure perspective \" In Proceedings of the 2018 IEEE International Symposium on High Performance Computer Architecture (HPCA) pp. 620--629 2018.","DOI":"10.1109\/HPCA.2018.00059"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/2063384.2063454"},{"key":"e_1_3_2_1_15_1","first-page":"265","article-title":"TensorFlow: A system for large-scale machine learning","author":"Abadi M.","year":"2016","journal-title":"Proceedings of the 12th USENIX conference on Operating Systems Design and Implementation (OSDI)"},{"key":"e_1_3_2_1_16_1","unstructured":"S. Chetlur etal \"cuDNN: Efficient primitives for deep learning \" arXiv preprint arXiv:1410.0759 2014. S. Chetlur et al. \"cuDNN: Efficient primitives for deep learning \" arXiv preprint arXiv:1410.0759 2014."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"crossref","unstructured":"E. Nurvitadhi etal \"Can FPGAs beat GPUs in accelerating next-generation deep neural networks? \" In Proceedings of the 2017 ACM\/SIGDA International Symposium on Field-Programmable Gate Arrays (FPGA-2017) pp. 5--14 2017. E. Nurvitadhi et al. \"Can FPGAs beat GPUs in accelerating next-generation deep neural networks? \" In Proceedings of the 2017 ACM\/SIGDA International Symposium on Field-Programmable Gate Arrays (FPGA-2017) pp. 5--14 2017.","DOI":"10.1145\/3020078.3021740"},{"key":"e_1_3_2_1_18_1","first-page":"20","article-title":"Cambricon-x: An accelerator for sparse neural networks","author":"Zhang S.","year":"2016","journal-title":"Proceedings of the 49th Annual IEEE\/ACM International Symposium on Microarchitecture (MICRO)"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3140659.3080254"},{"key":"e_1_3_2_1_20_1","unstructured":"J. Park etal \"Deep learning inference in Facebook data centers: Characterization performance optimizations and hardware implications \" arXiv preprint arXiv:1811.09886 2018. J. Park et al. \"Deep learning inference in Facebook data centers: Characterization performance optimizations and hardware implications \" arXiv preprint arXiv:1811.09886 2018."},{"key":"e_1_3_2_1_21_1","volume-title":"Proceedings of the 27th ACM\/IEEE International Symposium on Computer Architecture (ISCA)","author":"Rixner S.","year":"2000"},{"key":"e_1_3_2_1_22_1","unstructured":"S. Rixner \"Memory controller optimizations for web servers \" In Proceedings of the 37th ACM\/IEEE Annual International Symposium on Microarchitecture (MICRO) 2004. S. Rixner \"Memory controller optimizations for web servers \" In Proceedings of the 37 th ACM\/IEEE Annual International Symposium on Microarchitecture (MICRO) 2004."},{"key":"e_1_3_2_1_23_1","unstructured":"\"Intel math kernel library (MKL)\" http:\/\/software.intel.com\/en-us\/mkl. \"Intel math kernel library (MKL)\" http:\/\/software.intel.com\/en-us\/mkl."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"crossref","unstructured":"B. Akin Z. Chishti and A. Alameldeen \"ZCOMP: Reducing DNN Cross-Layer Memory Footprint Using Vector Extensions \" In Proceedings of the 52nd ACM\/IEEE Annual International Symposium on Microarchitecture (MICRO) 2019. B. Akin Z. Chishti and A. Alameldeen \"ZCOMP: Reducing DNN Cross-Layer Memory Footprint Using Vector Extensions \" In Proceedings of the 52 nd ACM\/IEEE Annual International Symposium on Microarchitecture (MICRO) 2019.","DOI":"10.1145\/3352460.3358305"},{"key":"e_1_3_2_1_25_1","first-page":"1","article-title":"Cnvlutin: Ineffectual-neuron-free deep neural network computing","author":"Albericio J.","year":"2016","journal-title":"Proceedings of the 43rd ACM\/IEEE International Symposium on Computer Architecture (ISCA)"}],"event":{"name":"MEMSYS '19: The International Symposium on Memory Systems","location":"Washington District of Columbia USA","acronym":"MEMSYS '19"},"container-title":["Proceedings of the International Symposium on Memory Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3357526.3357569","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,12]],"date-time":"2023-01-12T20:13:28Z","timestamp":1673554408000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3357526.3357569"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,9,30]]},"references-count":25,"alternative-id":["10.1145\/3357526.3357569","10.1145\/3357526"],"URL":"https:\/\/doi.org\/10.1145\/3357526.3357569","relation":{},"subject":[],"published":{"date-parts":[[2019,9,30]]},"assertion":[{"value":"2019-09-30","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}