{"id":"https://openalex.org/W2012610259","doi":"https://doi.org/10.1109/icdar.2011.268","title":"Facilitating Understanding of Large Document Collections","display_name":"Facilitating Understanding of Large Document Collections","publication_year":2011,"publication_date":"2011-09-01","ids":{"openalex":"https://openalex.org/W2012610259","doi":"https://doi.org/10.1109/icdar.2011.268","mag":"2012610259"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/icdar.2011.268","pdf_url":null,"source":{"id":"https://openalex.org/S4306419356","display_name":"International Conference on Document Analysis and Recognition","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5034639113","display_name":"Jae Hyeon Bae","orcid":null},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"funder","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jae Hyeon Bae","raw_affiliation_strings":["University of Texas at Austin#TAB#"],"affiliations":[{"raw_affiliation_string":"University of Texas at Austin#TAB#","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101883722","display_name":"Weijia Xu","orcid":"https://orcid.org/0000-0002-5134-6381"},"institutions":[{"id":"https://openalex.org/I4388891828","display_name":"Texas Advanced Computing Center","ror":"https://ror.org/00xg4bh43","country_code":null,"type":"facility","lineage":["https://openalex.org/I4388891828","https://openalex.org/I86519309"]}],"countries":[],"is_corresponding":false,"raw_author_name":"Weijia Xu","raw_affiliation_strings":["Texas Advanced Computing Center (TACC)"],"affiliations":[{"raw_affiliation_string":"Texas Advanced Computing Center (TACC)","institution_ids":["https://openalex.org/I4388891828"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5078797277","display_name":"Mar\u00eda Esteva","orcid":"https://orcid.org/0000-0001-6204-4517"},"institutions":[{"id":"https://openalex.org/I4388891828","display_name":"Texas Advanced Computing Center","ror":"https://ror.org/00xg4bh43","country_code":null,"type":"facility","lineage":["https://openalex.org/I4388891828","https://openalex.org/I86519309"]}],"countries":[],"is_corresponding":false,"raw_author_name":"Maria Esteva","raw_affiliation_strings":["Texas Advanced Computing Center (TACC)"],"affiliations":[{"raw_affiliation_string":"Texas Advanced Computing Center (TACC)","institution_ids":["https://openalex.org/I4388891828"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.206,"has_fulltext":true,"fulltext_origin":"ngrams","cited_by_count":2,"citation_normalized_percentile":{"value":0.374637,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":72,"max":76},"biblio":{"volume":null,"issue":null,"first_page":"1334","last_page":"1338"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9973,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9973,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.997,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9957,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/document-clustering","display_name":"Document Clustering","score":0.744036}],"concepts":[{"id":"https://openalex.org/C500882744","wikidata":"https://www.wikidata.org/wiki/Q269236","display_name":"Latent Dirichlet allocation","level":3,"score":0.9345293},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7976221},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.7819303},{"id":"https://openalex.org/C177937566","wikidata":"https://www.wikidata.org/wiki/Q4223102","display_name":"Document clustering","level":3,"score":0.744036},{"id":"https://openalex.org/C171686336","wikidata":"https://www.wikidata.org/wiki/Q3532085","display_name":"Topic model","level":2,"score":0.639048},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5901049},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.57805395},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.44626963},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.35000277},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.23981532},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/icdar.2011.268","pdf_url":null,"source":{"id":"https://openalex.org/S4306419356","display_name":"International Conference on Document Analysis and Recognition","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality education","score":0.64}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":13,"referenced_works":["https://openalex.org/W1673310716","https://openalex.org/W1880262756","https://openalex.org/W2062028408","https://openalex.org/W2075211019","https://openalex.org/W2086485446","https://openalex.org/W2113671562","https://openalex.org/W2116919243","https://openalex.org/W2122391756","https://openalex.org/W2126427322","https://openalex.org/W2325227998","https://openalex.org/W2894073262","https://openalex.org/W2918757710","https://openalex.org/W4231510805"],"related_works":["https://openalex.org/W4315588616","https://openalex.org/W4312773271","https://openalex.org/W4309228610","https://openalex.org/W4294597112","https://openalex.org/W3159709618","https://openalex.org/W3005513013","https://openalex.org/W2962686197","https://openalex.org/W2888805565","https://openalex.org/W2769501189","https://openalex.org/W2207653751"],"abstract_inverted_index":{"Large":[0],"document":[1,50,106],"collections":[2],"containing":[3],"multiple":[4],"topics":[5],"can":[6,26,36,72],"be":[7,37,73],"overwhelming":[8],"to":[9,19,52,102],"understand,":[10],"requiring":[11],"librarians":[12],"and":[13,17,66,91],"archivists":[14],"significant":[15],"time":[16],"efforts":[18],"develop":[20],"access":[21,56],"points.":[22],"Efficient":[23],"computational":[24],"methods":[25],"aid":[27],"this":[28],"process":[29,103],"by":[30],"uncovering":[31],"groups":[32],"of":[33,45,55,60,68,78],"documents":[34],"that":[35,71,82],"described":[38,74],"for":[39],"access.":[40,79],"We":[41,80,99],"investigate":[42],"the":[43],"use":[44,100],"density":[46],"based":[47],"clustering":[48,90],"with":[49],"segmentation":[51],"identify":[53],"points":[54,77],"as":[57,75],"dense":[58],"clusters":[59,70],"information.":[61],"The":[62],"method":[63,84],"returns":[64],"stories":[65],"classes":[67],"cohesive":[69],"precise":[76],"found":[81],"our":[83],"performs":[85],"more":[86],"efficiently":[87],"than":[88],"K-means":[89],"topic":[92],"model":[93],"using":[94],"Latent":[95],"Dirichlet":[96],"Allocation":[97],"(LDA).":[98],"Hadoop":[101],"a":[104],"large":[105],"collection.":[107]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2012610259","counts_by_year":[{"year":2021,"cited_by_count":1},{"year":2013,"cited_by_count":1}],"updated_date":"2025-03-23T08:43:08.508201","created_date":"2016-06-24"}