{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T01:20:24Z","timestamp":1740100824434,"version":"3.37.3"},"reference-count":58,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,5,1]],"date-time":"2022-05-01T00:00:00Z","timestamp":1651363200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,5,1]],"date-time":"2022-05-01T00:00:00Z","timestamp":1651363200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100002809","name":"Generalitat de Catalunya","doi-asserted-by":"publisher","award":["2017-SGR-1414"],"id":[{"id":"10.13039\/501100002809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100006280","name":"Spanish Ministry of Science and Technology","doi-asserted-by":"publisher","award":["PID2019-107255GB"],"id":[{"id":"10.13039\/501100006280","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,5]]},"DOI":"10.1109\/ipdps53621.2022.00096","type":"proceedings-article","created":{"date-parts":[[2022,7,15]],"date-time":"2022-07-15T19:41:01Z","timestamp":1657914061000},"page":"941-951","source":"Crossref","is-referenced-by-count":0,"title":["Task-based Acceleration of Bidirectional Recurrent Neural Networks on Multi-core Architectures"],"prefix":"10.1109","author":[{"given":"Robin Kumar","family":"Sharma","sequence":"first","affiliation":[{"name":"Barcelona Supercomputing Center (BSC), Universitat Politècnica de Catalunya (UPC),Computer Science Department"}]},{"given":"Marc","family":"Casas","sequence":"additional","affiliation":[{"name":"Barcelona Supercomputing Center (BSC), Universitat Politècnica de Catalunya (UPC),Computer Science Department"}]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1145\/3392717.3392762"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"journal-title":"TIDIGITS speech corpus","year":"1993","author":"leonard","key":"ref33"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/AHS.2019.000-4"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2019.00048"},{"key":"ref30","article-title":"Deep learning training in facebook data centers: Design of scale-up and scale-out systems","author":"naumov","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref37","article-title":"Pytorch: An imperative style, high-performance deep learning library","author":"paszke","year":"0","journal-title":"Advances in Neural IInformation Processing Systems"},{"key":"ref36","article-title":"TensorFlow: Large-scale machine learning on heterogeneous systems","author":"abadi","year":"2015","journal-title":"Software"},{"journal-title":"Keras","year":"2015","author":"chollet","key":"ref35"},{"key":"ref34","article-title":"Training and analyzing deep recurrent neural networks","author":"hermans","year":"2013","journal-title":"Advances in neural information processing systems"},{"key":"ref28","first-page":"620","article-title":"Applied machine learning at facebook: A datacenter infrastructure perspective","author":"h","year":"2018","journal-title":"2018 IEEE International Symposium on High Performance Computer Architecture (HPCA)"},{"journal-title":"Summit Supercomputer","year":"2022","key":"ref27"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00084"},{"key":"ref2","article-title":"Training and analyzing deep recurrent neural networks","author":"hermans","year":"2013","journal-title":"Advances in neural information processing systems"},{"key":"ref1","doi-asserted-by":"crossref","first-page":"2673","DOI":"10.1109\/78.650093","article-title":"Bidirectional recurrent neural networks","volume":"45","author":"schuster","year":"1997","journal-title":"IEEE Transactions on Signal Processing"},{"journal-title":"Scheduling computation graphs of deep learning models on manycore CPUs","year":"2018","author":"tang","key":"ref20"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1142\/S0129626411000151"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/99.660313"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3071762"},{"key":"ref23","first-page":"1","author":"kalamkar","year":"2020","journal-title":"Optimizing deep learning recommender systems' training on cpu cluster architectures"},{"journal-title":"Top 500 list","year":"2021","key":"ref26"},{"journal-title":"Supercomputer Fugaku","year":"2022","key":"ref25"},{"journal-title":"Deepcpu Serving rnn-based deep learning models 10x faster","year":"2018","author":"zhang","key":"ref50"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/CGO.2019.8661197"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1016\/j.future.2020.08.005"},{"journal-title":"Accelerating slide deep learning on modern cpus Vectorization quantizations memory optimizations and more","year":"2021","author":"daghaghi","key":"ref57"},{"key":"ref56","article-title":"Momentumrnn: Integrating momentum into recurrent neural networks","author":"nguyen","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref55","article-title":"Gating revisited: Deep multi-layer rnns that can be trained","author":"turkoglu","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2019.10.068"},{"journal-title":"TVM An automated end-to-end optimizing compiler for deep learning","year":"2018","author":"chen","key":"ref53"},{"key":"ref52","article-title":"Scheduling computation graphs of deep learning models on manycore cpus","volume":"abs 1807 9667","author":"tang","year":"2018","journal-title":"CoRR"},{"key":"ref10","article-title":"Single headed attention based sequence-to-sequence model for state-of-the-art results on switchboard-300","author":"t\u00fcske","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1016\/j.matpr.2020.05.450"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/NCG.2018.8593076"},{"key":"ref12","article-title":"Trans-blstm: Transformer with bidirectional lstm for language understanding","author":"huang","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/BigComp48618.2020.00132"},{"journal-title":"A Recurrent Neural Network Based Recommendation System","year":"0","author":"liu","key":"ref14"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.202"},{"key":"ref16","article-title":"A comparison of lstm and bert for small corpus","author":"ezen-can","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.14778\/3447689.3447692"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2005.06.042"},{"journal-title":"Empirical evaluation of gated recurrent neural networks on sequence modeling","year":"2014","author":"chung","key":"ref19"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1003"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2016.2539499"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N16-1056"},{"key":"ref5","article-title":"A novel approach to on-line handwriting recognition based on bidirectional long short-term memory networks","author":"liwicki","year":"0","journal-title":"ICDAR 2007"},{"key":"ref8","first-page":"14","volume":"20","author":"lecun","year":"2015","journal-title":"Lenet-5 convolutional neural networks"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i1.11198"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1145\/3302424.3303949"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1515\/jisys-2018-0372"},{"key":"ref46","first-page":"101839","article-title":"A survey on hardware accelerators and optimization techniques for rnns","author":"mittal","year":"2020","journal-title":"Journal of Systems Architecture"},{"key":"ref45","doi-asserted-by":"crossref","first-page":"132306","DOI":"10.1016\/j.physd.2019.132306","article-title":"Fundamentals of recurrent neural network (rnn) and long short-term memory (lstm) network","volume":"404","author":"sherstinsky","year":"2020","journal-title":"Physica D Nonlinear Phenomena"},{"key":"ref48","article-title":"Optimizing performance of recurrent neural networks on gpus","volume":"abs 1604 1946","author":"appleyard","year":"2016","journal-title":"CoRR"},{"key":"ref47","first-page":"4470","author":"lei","year":"2018","journal-title":"Simple recurrent units for highly parallelizable recurrence"},{"journal-title":"Using the Intel optimized Pytorch","year":"2020","key":"ref42"},{"journal-title":"Using the Intel optimized tensroflow","year":"2018","key":"ref41"},{"journal-title":"Recent advances in recurrent neural networks","year":"2017","author":"salehinejad","key":"ref44"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1145\/3431388"}],"event":{"name":"2022 IEEE International Parallel and Distributed Processing Symposium (IPDPS)","start":{"date-parts":[[2022,5,30]]},"location":"Lyon, France","end":{"date-parts":[[2022,6,3]]}},"container-title":["2022 IEEE International Parallel and Distributed Processing Symposium (IPDPS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9820609\/9820610\/09820648.pdf?arnumber=9820648","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,8]],"date-time":"2022-08-08T19:59:48Z","timestamp":1659988788000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9820648\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,5]]},"references-count":58,"URL":"https:\/\/doi.org\/10.1109\/ipdps53621.2022.00096","relation":{},"subject":[],"published":{"date-parts":[[2022,5]]}}}