{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,8,19]],"date-time":"2024-08-19T11:50:53Z","timestamp":1724068253913},"reference-count":6,"publisher":"MIT Press - Journals","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["TACL"],"published-print":{"date-parts":[[2014,12]]},"abstract":" Language identification is the task of automatically detecting the language(s) present in a document based on the content of the document. In this work, we address the problem of detecting documents that contain text from more than one language ( multilingual documents). We introduce a method that is able to detect that a document is multilingual, identify the languages present, and estimate their relative proportions. We demonstrate the effectiveness of our method over synthetic data, as well as real-world multilingual documents collected from the web. <\/jats:p>","DOI":"10.1162\/tacl_a_00163","type":"journal-article","created":{"date-parts":[[2018,12,28]],"date-time":"2018-12-28T15:43:26Z","timestamp":1546011806000},"page":"27-40","source":"Crossref","is-referenced-by-count":60,"title":["Automatic Detection and Language Identification of Multilingual Documents"],"prefix":"10.1162","volume":"2","author":[{"given":"Marco","family":"Lui","sequence":"first","affiliation":[{"name":"Department of Computing and Information Systems, The University of Melbourne"},{"name":"NICTA Victoria Research Laboratory,"}]},{"given":"Jey Han","family":"Lau","sequence":"additional","affiliation":[{"name":"Department of Philosophy, King\u2019s College London,"}]},{"given":"Timothy","family":"Baldwin","sequence":"additional","affiliation":[{"name":"Department of Computing and Information Systems, The University of Melbourne"},{"name":"NICTA Victoria Research Laboratory,"}]}],"member":"281","reference":[{"key":"p_6","first-page":"993","volume":"3","author":"Blei David M.","year":"2003","journal-title":"Journal of Machine Learning Research"},{"issue":"1","key":"p_14","doi-asserted-by":"crossref","first-page":"56","DOI":"10.1007\/s10115-003-0121-x","volume":"7","author":"Ghani Rayid","year":"2004","journal-title":"Knowledge and Information Systems"},{"key":"p_17","doi-asserted-by":"crossref","first-page":"5228","DOI":"10.1073\/pnas.0307752101","volume":"101","author":"Griffiths Thomas L.","year":"2004","journal-title":"Proceedings of the National Academy of Sciences"},{"issue":"3","key":"p_30","first-page":"94","volume":"20","author":"McNamee Paul","year":"2005","journal-title":"Journal of Computing Sciences in Colleges"},{"issue":"3","key":"p_33","doi-asserted-by":"crossref","first-page":"71","DOI":"10.1080\/07421222.1999.11518257","volume":"16","author":"Prager John M.","year":"1999","journal-title":"Journal of Management Information Systems"},{"key":"p_40","doi-asserted-by":"crossref","first-page":"1566","DOI":"10.1198\/016214506000000302","volume":"101","author":"Teh Yee Whye","year":"2006","journal-title":"Journal of the American Statistical Association"}],"container-title":["Transactions of the Association for Computational Linguistics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.mitpressjournals.org\/doi\/pdf\/10.1162\/tacl_a_00163","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,3,12]],"date-time":"2021-03-12T21:38:50Z","timestamp":1615585130000},"score":1,"resource":{"primary":{"URL":"https:\/\/direct.mit.edu\/tacl\/article\/43314"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014,12]]},"references-count":6,"alternative-id":["10.1162\/tacl_a_00163"],"URL":"https:\/\/doi.org\/10.1162\/tacl_a_00163","relation":{},"ISSN":["2307-387X"],"issn-type":[{"value":"2307-387X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2014,12]]}}}