{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T00:18:44Z","timestamp":1740097124620,"version":"3.37.3"},"publisher-location":"Cham","reference-count":18,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319143125"},{"type":"electronic","value":"9783319143132"}],"license":[{"start":{"date-parts":[[2014,1,1]],"date-time":"2014-01-01T00:00:00Z","timestamp":1388534400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2014]]},"DOI":"10.1007\/978-3-319-14313-2_14","type":"book-chapter","created":{"date-parts":[[2014,12,11]],"date-time":"2014-12-11T01:56:46Z","timestamp":1418263006000},"page":"158-169","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["OpenCL Performance Portability for Xeon Phi Coprocessor and NVIDIA GPUs: A Case Study of Finite Element Numerical Integration"],"prefix":"10.1007","author":[{"given":"Krzysztof","family":"Bana\u015b","sequence":"first","affiliation":[]},{"given":"Filip","family":"Kru\u017cel","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"issue":"6","key":"14_CR1","doi-asserted-by":"publisher","first-page":"1319","DOI":"10.1016\/j.camwa.2014.01.021","volume":"67","author":"K. Bana\u015b","year":"2014","unstructured":"Bana\u015b, K., P\u0142aszewski, P., Macio\u0142, P.: Numerical integration on GPUs for higher order finite elements. Computers and Mathematics with Applications\u00a067(6), 1319\u20131344 (2014)","journal-title":"Computers and Mathematics with Applications"},{"key":"14_CR2","series-title":"An Introduction","volume-title":"Finite Elements","author":"E. Becker","year":"1981","unstructured":"Becker, E., Carey, G., Oden, J.: Finite Elements. An Introduction. Prentice Hall, Englewood Cliffs (1981)"},{"issue":"5","key":"14_CR3","doi-asserted-by":"publisher","first-page":"28","DOI":"10.1109\/MM.2011.67","volume":"31","author":"S. Benkner","year":"2011","unstructured":"Benkner, S., Pllana, S., Traff, J., Tsigas, P., Dolinsky, U., Augonnet, C., Bachmayer, B., Kessler, C., Moloney, D., Osipov, V.: Peppher: Efficient and productive usage of hybrid computing systems. IEEE Micro\u00a031(5), 28\u201341 (2011)","journal-title":"IEEE Micro"},{"issue":"5","key":"14_CR4","doi-asserted-by":"publisher","first-page":"640","DOI":"10.1002\/nme.2989","volume":"85","author":"C. Cecka","year":"2011","unstructured":"Cecka, C., Lew, A.J., Darve, E.: Assembly of finite element methods on graphics processors. International Journal for Numerical Methods in Engineering\u00a085(5), 640\u2013669 (2011), \n \n http:\/\/dx.doi.org\/10.1002\/nme.2989","journal-title":"International Journal for Numerical Methods in Engineering"},{"key":"14_CR5","unstructured":"Goto, K., van de Geijn, R.A.: Anatomy of high-performance matrix multiplication. ACM Trans. Math. Softw.\u00a034(3), 12:1\u201312:25 (2008), \n \n http:\/\/doi.acm.org\/10.1145\/1356052.1356053"},{"key":"14_CR6","unstructured":"Group, K.O.W.: The OpenCL Specification, version 1.1 (2010), \n \n http:\/\/www.khronos.org\/registry\/cl\/specs\/opencl-1.1.pdf"},{"key":"14_CR7","unstructured":"Intel: Intel SDK for OpenCL Applications XE 2013 R3. User\u2019s Guide (2013)"},{"key":"14_CR8","doi-asserted-by":"crossref","unstructured":"Jeffers, J., Reinders, J.: Intel Xeon Phi Coprocessor High Performance Programming, 1st edn. Morgan Kaufmann (2013)","DOI":"10.1016\/B978-0-12-410414-3.00001-3"},{"issue":"10","key":"14_CR9","doi-asserted-by":"publisher","first-page":"2030","DOI":"10.1016\/j.camwa.2013.08.026","volume":"66","author":"F. Kru\u017cel","year":"2013","unstructured":"Kru\u017cel, F., Bana\u015b, K.: Vectorized OpenCL implementation of numerical integration for higher order finite elements. Computers and Mathematics with Applications\u00a066(10), 2030\u20132044 (2013)","journal-title":"Computers and Mathematics with Applications"},{"issue":"1","key":"14_CR10","doi-asserted-by":"publisher","first-page":"1815","DOI":"10.1016\/j.procs.2010.04.203","volume":"1","author":"G.R. Markall","year":"2010","unstructured":"Markall, G.R., Ham, D.A., Kelly, P.H.: Towards generating optimised finite element solvers for gpus from high-level specifications. Procedia Computer Science\u00a01(1), 1815\u20131823 (2010); iCCS 2010","journal-title":"Procedia Computer Science"},{"issue":"1","key":"14_CR11","first-page":"4","volume":"6","author":"D.T. Marr","year":"2002","unstructured":"Marr, D.T., Binns, F., Hill, D.L., Hinton, G., Koufaty, D.A., Miller, A.J., Upton, M.: Hyper-Threading Technology Architecture and Microarchitecture. Intel Technology Journal\u00a06(1), 4\u201315 (2002)","journal-title":"Intel Technology Journal"},{"key":"14_CR12","unstructured":"NVIDIA: NVIDIA CUDA C Programming Guide Version 5.0 (2012)"},{"key":"14_CR13","doi-asserted-by":"crossref","unstructured":"Reguly, I., Giles, M.: Finite element algorithms and data structures on graphical processing units. International Journal of Parallel Programming, 1\u201337 (2013), \n \n http:\/\/dx.doi.org\/10.1007\/s10766-013-0301-6","DOI":"10.1007\/s10766-013-0301-6"},{"key":"14_CR14","unstructured":"Rul, S., Vandierendonck, H., D\u2019Haene, J., De Bosschere, K.: An experimental study on performance portability of opencl kernels. In: Application Accelerators in High Performance Computing, 2010 Symposium, Papers, Knoxville, TN, USA, p. 3 (2010)"},{"key":"14_CR15","unstructured":"Top500, \n \n http:\/\/www.top500.org"},{"key":"14_CR16","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"330","DOI":"10.1007\/978-3-642-38750-0_25","volume-title":"Supercomputing","author":"S. Wienke","year":"2013","unstructured":"Wienke, S., an Mey, D., M\u00fcller, M.S.: Accelerators for technical computing: Is it worth the pain? A TCO perspective. In: Kunkel, J.M., Ludwig, T., Meuer, H.W. (eds.) ISC 2013. LNCS, vol.\u00a07905, pp. 330\u2013342. Springer, Heidelberg (2013)"},{"issue":"4","key":"14_CR17","doi-asserted-by":"publisher","first-page":"65","DOI":"10.1145\/1498765.1498785","volume":"52","author":"S. Williams","year":"2009","unstructured":"Williams, S., Waterman, A., Patterson, D.: Roofline: An insightful visual performance model for multicore architectures. Commun. ACM\u00a052(4), 65\u201376 (2009), \n \n http:\/\/doi.acm.org\/10.1145\/1498765.1498785","journal-title":"Commun. ACM"},{"key":"14_CR18","doi-asserted-by":"crossref","unstructured":"Yuen, D., Wang, L., Chi, X., Johnsson, L., Ge, W., Shi, Y. (eds.): GPU Solutions to Multi-scale Problems in Science and Engineering. Springer (2013)","DOI":"10.1007\/978-3-642-16405-7"}],"container-title":["Lecture Notes in Computer Science","Euro-Par 2014: Parallel Processing Workshops"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-14313-2_14","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,4,20]],"date-time":"2020-04-20T00:24:42Z","timestamp":1587342282000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-14313-2_14"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014]]},"ISBN":["9783319143125","9783319143132"],"references-count":18,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-14313-2_14","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2014]]},"assertion":[{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}