{"id":"https://openalex.org/W2059719708","doi":"https://doi.org/10.1145/1835449.1835529","title":"Combining coregularization and consensus-based self-training for multilingual text categorization","display_name":"Combining coregularization and consensus-based self-training for multilingual text categorization","publication_year":2010,"publication_date":"2010-07-19","ids":{"openalex":"https://openalex.org/W2059719708","doi":"https://doi.org/10.1145/1835449.1835529","mag":"2059719708"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/1835449.1835529","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111528497","display_name":"Massih R. Amini","orcid":null},"institutions":[{"id":"https://openalex.org/I197604219","display_name":"National Academies of Sciences, Engineering, and Medicine","ror":"https://ror.org/02eq2w707","country_code":"US","type":"government","lineage":["https://openalex.org/I197604219"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Massih R. Amini","raw_affiliation_strings":["National Research Council Canada, Gatineau, PQ, Canada#TAB#"],"affiliations":[{"raw_affiliation_string":"National Research Council Canada, Gatineau, PQ, Canada#TAB#","institution_ids":["https://openalex.org/I197604219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065257553","display_name":"Cyril Goutte","orcid":"https://orcid.org/0000-0003-4939-6555"},"institutions":[{"id":"https://openalex.org/I197604219","display_name":"National Academies of Sciences, Engineering, and Medicine","ror":"https://ror.org/02eq2w707","country_code":"US","type":"government","lineage":["https://openalex.org/I197604219"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Cyril Goutte","raw_affiliation_strings":["National Research Council Canada, Gatineau, PQ, Canada#TAB#"],"affiliations":[{"raw_affiliation_string":"National Research Council Canada, Gatineau, PQ, Canada#TAB#","institution_ids":["https://openalex.org/I197604219"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5084360449","display_name":"Nicolas Usunier","orcid":"https://orcid.org/0000-0002-9324-1457"},"institutions":[{"id":"https://openalex.org/I39804081","display_name":"Sorbonne Universit\u00e9","ror":"https://ror.org/02en5vm52","country_code":"FR","type":"funder","lineage":["https://openalex.org/I39804081"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Nicolas Usunier","raw_affiliation_strings":["Universit\u00e9 Pierre et Marie Curie (Paris 6), Paris, France"],"affiliations":[{"raw_affiliation_string":"Universit\u00e9 Pierre et Marie Curie (Paris 6), Paris, France","institution_ids":["https://openalex.org/I39804081"]}]}],"institution_assertions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"fulltext_origin":"ngrams","cited_by_count":16,"citation_normalized_percentile":{"value":0.951182,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":87,"max":88},"biblio":{"volume":null,"issue":null,"first_page":"475","last_page":"482"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9997,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9997,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9906,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9884,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/boosting","display_name":"Boosting","score":0.65896636},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.5386239},{"id":"https://openalex.org/keywords/co-training","display_name":"Co-training","score":0.5008397},{"id":"https://openalex.org/keywords/text-categorization","display_name":"Text Categorization","score":0.493585},{"id":"https://openalex.org/keywords/labeled-data","display_name":"Labeled data","score":0.48578003},{"id":"https://openalex.org/keywords/supervised-learning","display_name":"Supervised Learning","score":0.47113556}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.78504205},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7007272},{"id":"https://openalex.org/C94124525","wikidata":"https://www.wikidata.org/wiki/Q912550","display_name":"Categorization","level":2,"score":0.6598015},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.65896636},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5593728},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.5386239},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.510312},{"id":"https://openalex.org/C2776959682","wikidata":"https://www.wikidata.org/wiki/Q17005296","display_name":"Co-training","level":3,"score":0.5008397},{"id":"https://openalex.org/C2986744138","wikidata":"https://www.wikidata.org/wiki/Q302088","display_name":"Text categorization","level":3,"score":0.493585},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.48578003},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.482552},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.47113556},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4488004},{"id":"https://openalex.org/C58973888","wikidata":"https://www.wikidata.org/wiki/Q1041418","display_name":"Semi-supervised learning","level":2,"score":0.34330153},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.10173595},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/1835449.1835529","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://hal.archives-ouvertes.fr/hal-01291883","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":["Centre National de la Recherche Scientifique"],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://hal.science/hal-01291883","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.81,"display_name":"Quality education","id":"https://metadata.un.org/sdg/4"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":21,"referenced_works":["https://openalex.org/W118545087","https://openalex.org/W1479807131","https://openalex.org/W1482214997","https://openalex.org/W1489959797","https://openalex.org/W1546961578","https://openalex.org/W1560143607","https://openalex.org/W1987756646","https://openalex.org/W2031823405","https://openalex.org/W2035720976","https://openalex.org/W2048679005","https://openalex.org/W2107008379","https://openalex.org/W2133348086","https://openalex.org/W2136504847","https://openalex.org/W2139578439","https://openalex.org/W2140676093","https://openalex.org/W2142742813","https://openalex.org/W2145234365","https://openalex.org/W2145765191","https://openalex.org/W2597289420","https://openalex.org/W2798766386","https://openalex.org/W4244633107"],"related_works":["https://openalex.org/W60792937","https://openalex.org/W4312414840","https://openalex.org/W34092691","https://openalex.org/W2891078859","https://openalex.org/W2186473728","https://openalex.org/W2133556223","https://openalex.org/W2131153761","https://openalex.org/W2059598258","https://openalex.org/W192740413","https://openalex.org/W1520691178"],"abstract_inverted_index":{"We":[0,20,52,116],"investigate":[1],"the":[2,25,38,79,83,127,130,135,144,170],"problem":[3,23],"of":[4,27,37,78,89,129,132,143],"learning":[5,44,66,137],"document":[6],"classifiers":[7,75],"in":[8,24,45,67,93,169],"a":[9,87,111,118,140],"multilingual":[10,141],"setting,":[11],"from":[12,49,104],"collections":[13],"where":[14,30,176],"labels":[15,101],"are":[16,92,160,178],"only":[17],"partially":[18],"available.":[19,186],"address":[21],"this":[22,123],"framework":[26],"multiview":[28,63],"learning,":[29],"different":[31,35,68,73,150],"languages":[32],"correspond":[33],"to":[34,47],"views":[36,133,180],"same":[39],"document,":[40],"combined":[41],"with":[42],"semi-supervised":[43,65,136],"order":[46],"benefit":[48],"unlabeled":[50,90,106],"documents.":[51],"rely":[53],"on":[54,76,110,134,139],"two":[55],"techniques,":[56],"coregularization":[57,156],"and":[58,64,99,125,157,162,172,182],"consensus-based":[59,158],"self-training,":[60],"that":[61,82,155,163],"combine":[62],"ways.":[69],"Our":[70,152],"approach":[71],"trains":[72],"monolingual":[74],"each":[77],"views,":[80],"such":[81],"classifiers'":[84],"decisions":[85],"over":[86],"set":[88,108],"examples":[91,103],"agreement":[94],"as":[95,97],"much":[96],"possible,":[98],"iteratively":[100],"new":[102],"another":[105],"training":[107,120],"based":[109],"consensus":[112],"across":[113],"language-specific":[114],"classifiers.":[115],"derive":[117],"boosting-based":[119],"algorithm":[121],"for":[122],"task,":[124],"analyze":[126],"impact":[128],"number":[131],"results":[138],"extension":[142],"Reuters":[145],"RCV1/RCV2":[146],"corpus":[147],"using":[148],"five":[149],"languages.":[151],"experiments":[153],"show":[154],"self-training":[159],"complementary":[161],"their":[164],"combination":[165],"is":[166],"especially":[167],"effective":[168],"interesting":[171],"very":[173],"common":[174],"situation":[175],"there":[177],"few":[179,183],"(languages)":[181],"labeled":[184],"documents":[185]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2059719708","counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":2},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":2},{"year":2015,"cited_by_count":2},{"year":2014,"cited_by_count":1},{"year":2012,"cited_by_count":2}],"updated_date":"2025-04-17T06:24:55.849476","created_date":"2016-06-24"}