{"id":"https://openalex.org/W2078693345","doi":"https://doi.org/10.1145/345508.345582","title":"An investigation of linguistic features and clustering algorithms for topical document clustering","display_name":"An investigation of linguistic features and clustering algorithms for topical document clustering","publication_year":2000,"publication_date":"2000-07-01","ids":{"openalex":"https://openalex.org/W2078693345","doi":"https://doi.org/10.1145/345508.345582","mag":"2078693345"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1145/345508.345582","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/345508.345582","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://dl.acm.org/doi/pdf/10.1145/345508.345582","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5008449211","display_name":"Vasileios Hatzivassiloglou","orcid":null},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Vasileios Hatzivassiloglou","raw_affiliation_strings":["Department of Computer Science, Columbia Unwersity, 1214 Amsterdam Avenue, New York, NY"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Columbia Unwersity, 1214 Amsterdam Avenue, New York, NY","institution_ids":["https://openalex.org/I78577930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080063580","display_name":"Luis Gravano","orcid":null},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Luis Gravano","raw_affiliation_strings":["Department of Computer Science, Columbia Unwersity, 1214 Amsterdam Avenue, New York, NY"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Columbia Unwersity, 1214 Amsterdam Avenue, New York, NY","institution_ids":["https://openalex.org/I78577930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5044810123","display_name":"Ankineedu Maganti","orcid":null},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ankineedu Maganti","raw_affiliation_strings":["Department of Computer Science, Columbia Unwersity, 1214 Amsterdam Avenue, New York, NY"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Columbia Unwersity, 1214 Amsterdam Avenue, New York, NY","institution_ids":["https://openalex.org/I78577930"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":3.156,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":134,"citation_normalized_percentile":{"value":0.951487,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"224","last_page":"231"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9993,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9993,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.999,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9988,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/document-clustering","display_name":"Document Clustering","score":0.5037276}],"concepts":[{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.82536536},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7181419},{"id":"https://openalex.org/C177937566","wikidata":"https://www.wikidata.org/wiki/Q4223102","display_name":"Document clustering","level":3,"score":0.5037276},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.48248008},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4619679},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.34194142}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1145/345508.345582","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/345508.345582","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1145/345508.345582","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/345508.345582","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"score":0.76,"display_name":"Quality education","id":"https://metadata.un.org/sdg/4"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":20,"referenced_works":["https://openalex.org/W1493454437","https://openalex.org/W1537217019","https://openalex.org/W1586171079","https://openalex.org/W1857571498","https://openalex.org/W1971763646","https://openalex.org/W1978394996","https://openalex.org/W1991695254","https://openalex.org/W1997841190","https://openalex.org/W1998224037","https://openalex.org/W2005422315","https://openalex.org/W2029433174","https://openalex.org/W2038281398","https://openalex.org/W2039748601","https://openalex.org/W2070779353","https://openalex.org/W2074449313","https://openalex.org/W2136987143","https://openalex.org/W2608050011","https://openalex.org/W2797692640","https://openalex.org/W2999729612","https://openalex.org/W4296980202"],"related_works":["https://openalex.org/W4254379378","https://openalex.org/W4237592971","https://openalex.org/W4206655101","https://openalex.org/W3204019825","https://openalex.org/W3015674157","https://openalex.org/W2899601636","https://openalex.org/W2387982377","https://openalex.org/W2073681303","https://openalex.org/W2051487156","https://openalex.org/W2019737068"],"abstract_inverted_index":{"We":[0,114],"investigate":[1],"four":[2],"hierarchical":[3],"clustering":[4,99,112],"methods":[5],"(single-link,":[6],"complete-link,":[7],"groupwise-average,":[8],"and":[9,11,20,41,47,90,101],"single-pass)":[10],"two":[12],"linguistically":[13],"motivated":[14],"text":[15],"features":[16,107,120],"(noun":[17],"phrase":[18],"heads":[19],"proper":[21],"names)":[22],"in":[23,65,140],"the":[24,60,96,102,105,122,129,135,141],"context":[25],"of":[26,79,98,104,119,128],"document":[27],"clustering.":[28],"A":[29],"statistical":[30],"model":[31],"for":[32,62,71],"combining":[33,72],"similarity":[34],"information":[35],"from":[36,86],"multiple":[37,87],"sources":[38,89],"is":[39],"described":[40],"applied":[42],"to":[43,67,121],"DARPA's":[44],"Topic":[45],"Detection":[46],"Tracking":[48],"phase":[49],"2":[50],"(TDT2)":[51],"data.":[52],"This":[53],"model,":[54],"based":[55],"on":[56,111],"log-linear":[57],"regression,":[58],"alleviates":[59],"need":[61],"extensive":[63,77],"search":[64],"order":[66],"determine":[68],"optimal":[69,117],"weights":[70],"input":[73],"features.":[74],"Through":[75],"an":[76,109],"series":[78],"experiments":[80],"with":[81,134],"more":[82],"than":[83],"40,000":[84],"documents":[85,130],"news":[88],"modalities,":[91],"we":[92],"establish":[93],"that":[94,131],"both":[95],"choice":[97],"algorithm":[100],"introduction":[103],"additional":[106],"have":[108],"impact":[110],"performance.":[113],"apply":[115],"our":[116],"combination":[118],"TDT2":[123,143],"test":[124],"data,":[125],"obtaining":[126],"partitions":[127],"compare":[132],"favorably":[133],"results":[136],"obtained":[137],"by":[138],"participants":[139],"official":[142],"competition.":[144]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2078693345","counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":4},{"year":2019,"cited_by_count":5},{"year":2018,"cited_by_count":4},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":2},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":8},{"year":2013,"cited_by_count":8},{"year":2012,"cited_by_count":4}],"updated_date":"2025-01-19T11:48:07.410220","created_date":"2016-06-24"}