{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,2,13]],"date-time":"2024-02-13T08:49:09Z","timestamp":1707814149632},"reference-count":45,"publisher":"Elsevier BV","issue":"1","license":[{"start":{"date-parts":[[2019,3,1]],"date-time":"2019-03-01T00:00:00Z","timestamp":1551398400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2019,3,19]],"date-time":"2019-03-19T00:00:00Z","timestamp":1552953600000},"content-version":"vor","delay-in-days":18,"URL":"http:\/\/creativecommons.org\/licenses\/by-nc-nd\/3.0\/"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Data and Information Management"],"published-print":{"date-parts":[[2019,3]]},"DOI":"10.2478\/dim-2019-0004","type":"journal-article","created":{"date-parts":[[2019,5,27]],"date-time":"2019-05-27T17:32:00Z","timestamp":1558978320000},"page":"18-25","source":"Crossref","is-referenced-by-count":1,"title":["Petabytes in Practice: Working with Collections as Data at Scale"],"prefix":"10.1016","volume":"3","author":[{"given":"Will R.","family":"Thomas","sequence":"first","affiliation":[]},{"given":"Benjamin","family":"Galewsky","sequence":"additional","affiliation":[]},{"given":"Sandeep Puthanveetil","family":"Satheesan","sequence":"additional","affiliation":[]},{"given":"Gregory","family":"Jansen","sequence":"additional","affiliation":[]},{"given":"Richard","family":"Marciano","sequence":"additional","affiliation":[]},{"given":"Shannon","family":"Bradley","sequence":"additional","affiliation":[]},{"given":"Jong","family":"Lee","sequence":"additional","affiliation":[]},{"given":"Luigi","family":"Marini","sequence":"additional","affiliation":[]},{"given":"Kenton","family":"McHenry","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.2478\/dim-2019-0004_bib001","author":"Amodei"},{"key":"10.2478\/dim-2019-0004_bib002","series-title":"Proceedings of the Twenty-Fifth ACM SIGMOD-SIGACT-SIGART Symposium on Principles of Database Systems","first-page":"338","article-title":"Relational Lenses: A Language for Updatable Views","author":"Bohannon","year":"2006"},{"key":"10.2478\/dim-2019-0004_bib003","doi-asserted-by":"crossref","first-page":"357","DOI":"10.1162\/tacl_a_00104","article-title":"Named entity recognition with bidirectional LSTM-CNNs","volume":"4","author":"Chiu","year":"2016","journal-title":"Transactions of the Association for Computational Linguistics"},{"key":"10.2478\/dim-2019-0004_bib004","series-title":"Algorithms for Closest-Point Problems (Computational Geometry)","author":"Clarkson","year":"1984"},{"key":"10.2478\/dim-2019-0004_bib005","series-title":"Proceedings of the Seventh International Conference on Intelligent Systems for Molecular Biology","first-page":"77","article-title":"Constructing biological knowledge bases by extracting information from text sources","author":"Craven","year":"1999"},{"key":"10.2478\/dim-2019-0004_bib006","author":"Dhingra"},{"key":"10.2478\/dim-2019-0004_bib007","series-title":"International semantic web conference","first-page":"135","article-title":"Real-time RDF extraction from unstructured data streams","author":"Gerber","year":"2013"},{"key":"10.2478\/dim-2019-0004_bib008","series-title":"Proceedings of the 23rd international conference on machine learning","first-page":"369","article-title":"Connectionist temporal classification: Labelling unsegmented sequence data with recurrent neural networks","author":"Graves","year":"2006"},{"key":"10.2478\/dim-2019-0004_bib009","series-title":"DRAS-TIC Measures: Digital Repository at Scale that Invites Computation (To Improve Collections)","author":"Jansen","year":"2016"},{"key":"10.2478\/dim-2019-0004_bib010","series-title":"iPres 2016 13th International Conference on Digital Preservation","first-page":"117","article-title":"Designing Scalable Cyberinfrastructure for Metadata Extraction in Billion-Record Archives","author":"Jansen","year":"2016"},{"issue":"4","key":"10.2478\/dim-2019-0004_bib011","doi-asserted-by":"crossref","first-page":"1233","DOI":"10.2307\/3660172","article-title":"The long civil rights movement and the political uses of the past","volume":"91","author":"Hall","year":"2005","journal-title":"The Journal of American History"},{"issue":"8","key":"10.2478\/dim-2019-0004_bib012","doi-asserted-by":"crossref","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","article-title":"Long short-term memory","volume":"9","author":"Hochreiter","year":"1997","journal-title":"Neural Computation"},{"key":"10.2478\/dim-2019-0004_bib013","author":"IMLS"},{"issue":"5","key":"10.2478\/dim-2019-0004_bib014","doi-asserted-by":"crossref","first-page":"722","DOI":"10.2307\/3088915","article-title":"How the civil rights movement revitalized labor militancy","volume":"67","author":"Isaac","year":"2002","journal-title":"American Sociological Review"},{"key":"10.2478\/dim-2019-0004_bib015","series-title":"Proceedings of the 22nd ACM international conference on Multimedia","first-page":"675","article-title":"Caffe: Convolutional architecture for fast feature embedding","author":"Jia","year":"2014"},{"key":"10.2478\/dim-2019-0004_bib016","series-title":"2016 12th IAPR Workshop on Document Analysis Systems (DAS)","first-page":"198","article-title":"OCR error correction using character correction and feature-based word classification","author":"Kissos","year":"2016"},{"key":"10.2478\/dim-2019-0004_bib017","series-title":"2007 Future of Software Engineering","first-page":"259","article-title":"Self-managed systems: An architectural challenge","author":"Kramer","year":"2007"},{"issue":"2","key":"10.2478\/dim-2019-0004_bib018","doi-asserted-by":"crossref","first-page":"35","DOI":"10.1145\/1773912.1773922","article-title":"Cassandra- A decentralized structured storage system","volume":"44","author":"Lakshman","year":"2010","journal-title":"ACM SIGOPS Operating Systems Review"},{"key":"10.2478\/dim-2019-0004_bib019","series-title":"Proceedings of 2010 IEEE International Symposium on Circuits and Systems","first-page":"253","article-title":"Convolutional networks and applications in vision","author":"LeCun","year":"2010"},{"key":"10.2478\/dim-2019-0004_bib020","author":"Loper"},{"issue":"3","key":"10.2478\/dim-2019-0004_bib021","doi-asserted-by":"crossref","first-page":"54","DOI":"10.1145\/603867.603876","article-title":"Preservation of digital data with self-validating, self-instantiating knowledge-based archives","volume":"30","author":"Lud\u00e4scher","year":"2001","journal-title":"SIGMOD Record"},{"key":"10.2478\/dim-2019-0004_bib022","series-title":"Re-Envisioning the MLS: Perspectives on the future of library and information science education","first-page":"179","article-title":"Archival records and training in the age of big data","author":"Marciano","year":"2018"},{"issue":"2","key":"10.2478\/dim-2019-0004_bib023","first-page":"313","article-title":"Building a large annotated corpus of English: The Penn Treebank (No. MS-CIS-93-87)","volume":"19","author":"Marcus","year":"1993","journal-title":"Computational Linguistics"},{"key":"10.2478\/dim-2019-0004_bib024","series-title":"Proceedings of the Practice and Experience on Advanced Research Computing (PEARC '18)","article-title":"Clowder: Open Source Data Management for Long Tail Data","author":"Marini","year":"2018"},{"key":"10.2478\/dim-2019-0004_bib025","author":"Mart\u00edn"},{"key":"10.2478\/dim-2019-0004_sbref260","series-title":"Language by ear and by eye: The relationship between speech and reading","article-title":"Reading, the linguistic process, and linguistic awareness","author":"Mattingly","year":"1972"},{"key":"10.2478\/dim-2019-0004_sbref270","author":"McHenry"},{"issue":"1","key":"10.2478\/dim-2019-0004_bib027","first-page":"1235","article-title":"Mllib: Machine learning in apache spark","volume":"17","author":"Meng","year":"2016","journal-title":"Journal of Machine Learning Research"},{"key":"10.2478\/dim-2019-0004_bib028","author":"Miwa"},{"key":"10.2478\/dim-2019-0004_bib029","author":"Moritz"},{"key":"10.2478\/dim-2019-0004_bib030","author":"Neuberg"},{"key":"10.2478\/dim-2019-0004_bib031","series-title":"Proceedings of the 28th International Conference on Machine Learning (ICML-11)","first-page":"689","article-title":"Multimodal deep learning","author":"Ngiam","year":"2011"},{"key":"10.2478\/dim-2019-0004_bib032","series-title":"2015 IEEE International Conference on Big Data (Big Data)","first-page":"493","article-title":"Brown Dog: Leveraging everything towards autocuration","author":"Padhy","year":"2015"},{"key":"10.2478\/dim-2019-0004_bib033","author":"Padilla"},{"issue":"1","key":"10.2478\/dim-2019-0004_bib034","doi-asserted-by":"crossref","first-page":"86","DOI":"10.1093\/llc\/fqt037","article-title":"Factoid-based prosopography and computer ontologies: Towards an integrated approach","volume":"30","author":"Pasin","year":"2015","journal-title":"Literary and Linguistic Computing"},{"key":"10.2478\/dim-2019-0004_bib035","series-title":"Proceedings of the Workshop on Speech and Natural Language","first-page":"357","article-title":"The Design for The Wall Street Journal-based CSR Corpus","author":"Paul","year":"1992"},{"key":"10.2478\/dim-2019-0004_bib036","series-title":"Thirtieth AAAI Conference on Artificial Intelligence","article-title":"Look, Listen and Learn\u2013a Multimodal LSTM for Speaker Identification","author":"Ren","year":"2016"},{"key":"10.2478\/dim-2019-0004_bib037","series-title":"The Semantic Web \u2013 ISWC 2016","first-page":"498","article-title":"RDF2Vec: RDF graph embeddings for data mining","author":"Ristoski","year":"2016"},{"key":"10.2478\/dim-2019-0004_bib038","series-title":"International Conference on Medical image computing and computer-assisted intervention","first-page":"234","article-title":"U-net: Convolutional networks for biomedical image segmentation","author":"Ronneberger","year":"2015"},{"key":"10.2478\/dim-2019-0004_bib039","series-title":"Proceedings of the Practice and Experience on Advanced Research Computing","first-page":"1","article-title":"Brown dog: Making the digital world a better place, a few files at a time","author":"Satheesan","year":"2018"},{"key":"10.2478\/dim-2019-0004_bib040","series-title":"Collaborative learning: Cognitive and Computational Approaches. Advances in Learning and Instruction Series","first-page":"197","article-title":"The productive agency that drives collaborative learning","author":"Schwartz","year":"1999"},{"issue":"11","key":"10.2478\/dim-2019-0004_bib041","doi-asserted-by":"crossref","first-page":"2298","DOI":"10.1109\/TPAMI.2016.2646371","article-title":"An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition","volume":"39","author":"Shi","year":"2017","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"10.2478\/dim-2019-0004_bib042","author":"University of Maryland"},{"key":"10.2478\/dim-2019-0004_bib043","author":"W3C"},{"issue":"11","key":"10.2478\/dim-2019-0004_bib044","doi-asserted-by":"crossref","first-page":"56","DOI":"10.1145\/2934664","article-title":"Apache Spark: A unified engine for big data processing","volume":"59","author":"Zaharia","year":"2016","journal-title":"Communications of the ACM"}],"container-title":["Data and Information Management"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/content.sciendo.com\/view\/journals\/dim\/3\/1\/article-p18.xml","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S2543925122000717?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S2543925122000717?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.sciendo.com\/pdf\/10.2478\/dim-2019-0004","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,4,28]],"date-time":"2022-04-28T03:02:33Z","timestamp":1651114953000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S2543925122000717"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,3]]},"references-count":45,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2019,3]]}},"alternative-id":["S2543925122000717"],"URL":"https:\/\/doi.org\/10.2478\/dim-2019-0004","relation":{},"ISSN":["2543-9251"],"issn-type":[{"value":"2543-9251","type":"print"}],"subject":[],"published":{"date-parts":[[2019,3]]}}}