{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,3]],"date-time":"2024-09-03T05:58:12Z","timestamp":1725343092778},"reference-count":20,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,11,29]],"date-time":"2021-11-29T00:00:00Z","timestamp":1638144000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,11,29]],"date-time":"2021-11-29T00:00:00Z","timestamp":1638144000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100006754","name":"Army Research Laboratory","doi-asserted-by":"publisher","award":["W911NF-17-2-0196"],"id":[{"id":"10.13039\/100006754","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"NSF","doi-asserted-by":"publisher","award":["CPS 20-38817"],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000003","name":"Boeing Company","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000003","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,11,29]]},"DOI":"10.1109\/milcom52596.2021.9652907","type":"proceedings-article","created":{"date-parts":[[2021,12,30]],"date-time":"2021-12-30T21:06:10Z","timestamp":1640898370000},"source":"Crossref","is-referenced-by-count":10,"title":["Towards an Accurate Latency Model for Convolutional Neural Network Layers on GPUs"],"prefix":"10.1109","author":[{"given":"Jinyang","family":"Li","sequence":"first","affiliation":[{"name":"University of Illinois,Urbana-Champaign"}]},{"given":"Runyu","family":"Ma","sequence":"additional","affiliation":[{"name":"George Mason University"}]},{"given":"Vikram Sharma","family":"Mailthody","sequence":"additional","affiliation":[{"name":"University of Illinois,Urbana-Champaign"}]},{"given":"Colin","family":"Samplawski","sequence":"additional","affiliation":[{"name":"University of Massachusetts Amherst"}]},{"given":"Benjamin","family":"Marlin","sequence":"additional","affiliation":[{"name":"University of Massachusetts Amherst"}]},{"given":"Songqing","family":"Chen","sequence":"additional","affiliation":[{"name":"George Mason University"}]},{"given":"Shuochao","family":"Yao","sequence":"additional","affiliation":[{"name":"George Mason University"}]},{"given":"Tarek","family":"Abdelzaher","sequence":"additional","affiliation":[{"name":"University of Illinois,Urbana-Champaign"}]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/3079079.3079083"},{"key":"ref11","year":"2021","journal-title":"CUDA C++ Programming Guide"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2020.2971677"},{"key":"ref13","year":"2020","journal-title":"Nvidia al00 gpu architecture whitepa-per"},{"key":"ref14","year":"2017","journal-title":"Nvidia tesla vl00 gpu architecture whitepa-per"},{"key":"ref15","author":"kirk","year":"2017","journal-title":"Programming Massively Parallel Processors"},{"key":"ref16","article-title":"cuDNN: Efficient Primitives for Deep Learning","author":"chetlur","year":"2014","journal-title":"ArXiv"},{"key":"ref17","author":"corporation","year":"2020","journal-title":"Deep Learning Performance Documentation"},{"key":"ref18","year":"2021","journal-title":"CUTLASS CUDA Templates for Linear Algebra Subroutines"},{"key":"ref19","year":"2020","journal-title":"Matrix Multiplication Background User Guide"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01234-2_48"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/3458864.3467882"},{"key":"ref6","author":"qi","year":"2016","journal-title":"Paleo A performance model for deep neural networks"},{"key":"ref5","article-title":"Pruning Filters for Efficient ConvNets","author":"li","year":"2017","journal-title":"ArXiv"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2019.00041"},{"key":"ref7","article-title":"BRP-NAS: Prediction-based NAS using GCNs","author":"dudziak","year":"2021","journal-title":"ArXiv"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/3274783.3274840"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/3131672.3131675"},{"key":"ref9","first-page":"1","article-title":"Performance upper bound analysis and optimization of SGEMM on Fermi and Kepler GPUs","author":"lai","year":"2013","journal-title":"Proceedings of the 2013 IEEE\/ACM International Symposium on Code Generation and Optimization (CGO)"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS47924.2020.00053"}],"event":{"name":"MILCOM 2021 - 2021 IEEE Military Communications Conference (MILCOM)","location":"San Diego, CA, USA","start":{"date-parts":[[2021,11,29]]},"end":{"date-parts":[[2021,12,2]]}},"container-title":["MILCOM 2021 - 2021 IEEE Military Communications Conference (MILCOM)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9652874\/9652880\/09652907.pdf?arnumber=9652907","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,3]],"date-time":"2022-08-03T00:09:29Z","timestamp":1659485369000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9652907\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,11,29]]},"references-count":20,"URL":"https:\/\/doi.org\/10.1109\/milcom52596.2021.9652907","relation":{},"subject":[],"published":{"date-parts":[[2021,11,29]]}}}