{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T05:29:51Z","timestamp":1730266191106,"version":"3.28.0"},"reference-count":33,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,6,18]],"date-time":"2023-06-18T00:00:00Z","timestamp":1687046400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,6,18]],"date-time":"2023-06-18T00:00:00Z","timestamp":1687046400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,6,18]]},"DOI":"10.1109\/ijcnn54540.2023.10191680","type":"proceedings-article","created":{"date-parts":[[2023,8,2]],"date-time":"2023-08-02T17:30:03Z","timestamp":1690997403000},"page":"01-07","source":"Crossref","is-referenced-by-count":0,"title":["Linguistic Feature Injection for Efficient Natural Language Processing"],"prefix":"10.1109","author":[{"given":"Stefano","family":"Fioravanti","sequence":"first","affiliation":[{"name":"DIISM, University of Siena,Siena,Italy"}]},{"given":"Andrea","family":"Zugarini","sequence":"additional","affiliation":[{"name":"R&D, expert.ai,Siena,Italy"}]},{"given":"Francesco","family":"Giannini","sequence":"additional","affiliation":[{"name":"DIISM, University of Siena,Siena,Italy"}]},{"given":"Leonardo","family":"Rigutini","sequence":"additional","affiliation":[{"name":"R&D, expert.ai,Siena,Italy"}]},{"given":"Marco","family":"Maggini","sequence":"additional","affiliation":[{"name":"DIISM, University of Siena,Siena,Italy"}]},{"given":"Michelangelo","family":"Diligenti","sequence":"additional","affiliation":[{"name":"DIISM, University of Siena,Siena,Italy"}]}],"member":"263","reference":[{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.findings-emnlp.372"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6409"},{"key":"ref15","article-title":"To prune, or not to prune: exploring the efficacy of pruning for model compression","author":"zhu","year":"2017","journal-title":"ArXiv Preprint"},{"key":"ref14","first-page":"5776","article-title":"Minilm: Deep self-attention distillation for task-agnostic compression of pretrained transformers","volume":"33","author":"wang","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W17-4418"},{"key":"ref30","first-page":"138","article-title":"Results of the WNUT16 named entity recognition shared task","author":"strauss","year":"2016","journal-title":"Proceedings of the 2nd Workshop on Noisy User-generated Text (WNUT)"},{"key":"ref11","first-page":"1877","article-title":"Language models are few-shot learners","volume":"33","author":"brown","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref33","article-title":"Quantifying the carbon emissions of machine learning","volume":"abs 1910 9700","author":"lacoste","year":"2019","journal-title":"ArXiv"},{"key":"ref10","article-title":"DeBERTa: Decoding-enhanced BERT with disentangled attention","author":"he","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1093\/database\/baw068"},{"key":"ref2","first-page":"4171","article-title":"BERT: Pretraining of deep bidirectional transformers for language understanding","author":"devlin","year":"2019","journal-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics Human Language Technologies Volume 1 (Long and Short Papers)"},{"key":"ref1","article-title":"Attention is all you need","volume":"30","author":"vaswani","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref17","article-title":"Fast vocabulary transfer for language model compression","author":"gee","year":"0","journal-title":"Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing"},{"key":"ref16","article-title":"Are sixteen heads really better than one?","volume":"32","author":"michel","year":"2019","journal-title":"Advances in neural information processing systems"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D16-1007"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/P15-1150"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.18"},{"key":"ref23","first-page":"4129","article-title":"A structural probe for finding syntax in word representations","author":"hewitt","year":"2019","journal-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics Human Language Technologies Volume 1 (Long and Short Papers)"},{"key":"ref26","article-title":"Tree-structured attention with hierarchical accumulation","author":"nguyen","year":"2020","journal-title":"ICLRE"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.eacl-main.228"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D15-1206"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1548"},{"key":"ref21","article-title":"Neural language modeling by jointly learning syntax and lexicon","volume":"abs 1711 2013","author":"shen","year":"2017","journal-title":"ICLRE"},{"key":"ref28","first-page":"3210","article-title":"Cogbert: Cognition-guided pre-trained language models","author":"ding","year":"0","journal-title":"Proceedings of the 29th International Conference on Computational Linguistics"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.eacl-main.262"},{"key":"ref29","article-title":"Distilling the knowledge in a neural network","volume":"2","author":"hinton","year":"2015","journal-title":"ArXiv Preprint"},{"journal-title":"spaCy 2 Natural language understanding with Bloom embeddings convolutional neural networks and incremental parsing","year":"2017","author":"honnibal","key":"ref8"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.534"},{"key":"ref9","article-title":"Longformer: The long-document transformer","author":"beltagy","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref4","article-title":"Distilbert, a distilled version of bert: smaller, faster, cheaper and lighter","author":"sanh","year":"2019","journal-title":"ArXiv Preprint"},{"journal-title":"RoBERTa A Robustly optimized BERT Pretraining Approach","year":"2020","author":"liu","key":"ref3"},{"key":"ref6","article-title":"Energy usage reports: Environmental awareness as part of algorithmic accountability","author":"lottick","year":"0","journal-title":"NeurIPS 2019 Workshop on Tackling Climate Change with Machine Learning"},{"key":"ref5","article-title":"Exploring the limits of transfer learning with a unified text-to-text transformer","author":"roberts","year":"2019","journal-title":"Google Tech Rep"}],"event":{"name":"2023 International Joint Conference on Neural Networks (IJCNN)","start":{"date-parts":[[2023,6,18]]},"location":"Gold Coast, Australia","end":{"date-parts":[[2023,6,23]]}},"container-title":["2023 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10190990\/10190992\/10191680.pdf?arnumber=10191680","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,21]],"date-time":"2023-08-21T17:46:55Z","timestamp":1692640015000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10191680\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,18]]},"references-count":33,"URL":"https:\/\/doi.org\/10.1109\/ijcnn54540.2023.10191680","relation":{},"subject":[],"published":{"date-parts":[[2023,6,18]]}}}