{"id":"https://openalex.org/W1986637734","doi":"https://doi.org/10.1142/s0219649214500269","title":"Improved Text Clustering Using k-Mean Bayesian Vectoriser","display_name":"Improved Text Clustering Using k-Mean Bayesian Vectoriser","publication_year":2014,"publication_date":"2014-09-01","ids":{"openalex":"https://openalex.org/W1986637734","doi":"https://doi.org/10.1142/s0219649214500269","mag":"1986637734"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1142/s0219649214500269","pdf_url":null,"source":{"id":"https://openalex.org/S30163770","display_name":"Journal of Information & Knowledge Management","issn_l":"0219-6492","issn":["0219-6492","1793-6926"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5043072644","display_name":"Hanan M. Alghamdi","orcid":"https://orcid.org/0000-0001-5951-2498"},"institutions":[{"id":"https://openalex.org/I199693650","display_name":"Umm al-Qura University","ror":"https://ror.org/01xjqrm90","country_code":"SA","type":"education","lineage":["https://openalex.org/I199693650"]},{"id":"https://openalex.org/I4576418","display_name":"University of Technology Malaysia","ror":"https://ror.org/026w31v75","country_code":"MY","type":"education","lineage":["https://openalex.org/I4576418"]}],"countries":["MY","SA"],"is_corresponding":false,"raw_author_name":"Hanan M. Alghamdi","raw_affiliation_strings":["Faculty of Computer Science, Umm Al-Qura University, Al-Gunfdh, Saudi Arabia","Faculty of Computing, Universiti Teknologi Malaysia, UTM, Johor Bahru, Johor 81310, Malaysia"],"affiliations":[{"raw_affiliation_string":"Faculty of Computer Science, Umm Al-Qura University, Al-Gunfdh, Saudi Arabia","institution_ids":["https://openalex.org/I199693650"]},{"raw_affiliation_string":"Faculty of Computing, Universiti Teknologi Malaysia, UTM, Johor Bahru, Johor 81310, Malaysia","institution_ids":["https://openalex.org/I4576418"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089742876","display_name":"Ali Selamat","orcid":"https://orcid.org/0000-0001-9746-8459"},"institutions":[{"id":"https://openalex.org/I4576418","display_name":"University of Technology Malaysia","ror":"https://ror.org/026w31v75","country_code":"MY","type":"education","lineage":["https://openalex.org/I4576418"]}],"countries":["MY"],"is_corresponding":false,"raw_author_name":"Ali Selamat","raw_affiliation_strings":["Faculty of Computing, Universiti Teknologi Malaysia, UTM Johor Bahru, Johor, 81310, Malaysia"],"affiliations":[{"raw_affiliation_string":"Faculty of Computing, Universiti Teknologi Malaysia, UTM Johor Bahru, Johor, 81310, Malaysia","institution_ids":["https://openalex.org/I4576418"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5061067183","display_name":"Nor Shahriza Abdul Karim","orcid":"https://orcid.org/0000-0002-9433-9229"},"institutions":[{"id":"https://openalex.org/I142024983","display_name":"Prince Sultan University","ror":"https://ror.org/053mqrf26","country_code":"SA","type":"education","lineage":["https://openalex.org/I142024983"]}],"countries":["SA"],"is_corresponding":false,"raw_author_name":"Nor Shahriza Abdul Karim","raw_affiliation_strings":["Computer & Information Science Department, Prince Sultan University, 66833 Rafha Street, Riyadh 11586, Saudi Arabia#TAB#"],"affiliations":[{"raw_affiliation_string":"Computer & Information Science Department, Prince Sultan University, 66833 Rafha Street, Riyadh 11586, Saudi Arabia#TAB#","institution_ids":["https://openalex.org/I142024983"]}]}],"institution_assertions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.33,"has_fulltext":true,"fulltext_origin":"ngrams","cited_by_count":7,"citation_normalized_percentile":{"value":0.817832,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":83,"max":84},"biblio":{"volume":"13","issue":"03","first_page":"1450026","last_page":"1450026"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9986,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9986,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10637","display_name":"Advanced Clustering Algorithms Research","score":0.9981,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9919,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/k-means-clustering","display_name":"K-Means Clustering","score":0.6125195},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.50681216},{"id":"https://openalex.org/keywords/divergence","display_name":"Divergence (linguistics)","score":0.48630914},{"id":"https://openalex.org/keywords/document-clustering","display_name":"Document Clustering","score":0.48496962},{"id":"https://openalex.org/keywords/representation","display_name":"Representation","score":0.41792372}],"concepts":[{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.7991351},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.72427},{"id":"https://openalex.org/C207968372","wikidata":"https://www.wikidata.org/wiki/Q310401","display_name":"k-means clustering","level":3,"score":0.6125195},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5759768},{"id":"https://openalex.org/C107673813","wikidata":"https://www.wikidata.org/wiki/Q812534","display_name":"Bayesian probability","level":2,"score":0.5566239},{"id":"https://openalex.org/C120174047","wikidata":"https://www.wikidata.org/wiki/Q847073","display_name":"Euclidean distance","level":2,"score":0.5217779},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.50681216},{"id":"https://openalex.org/C207390915","wikidata":"https://www.wikidata.org/wiki/Q1230525","display_name":"Divergence (linguistics)","level":2,"score":0.48630914},{"id":"https://openalex.org/C177937566","wikidata":"https://www.wikidata.org/wiki/Q4223102","display_name":"Document clustering","level":3,"score":0.48496962},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.45186925},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.41792372},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.25839728},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.117176116},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1142/s0219649214500269","pdf_url":null,"source":{"id":"https://openalex.org/S30163770","display_name":"Journal of Information & Knowledge Management","issn_l":"0219-6492","issn":["0219-6492","1793-6926"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.64,"display_name":"Quality education"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":33,"referenced_works":["https://openalex.org/W146434704","https://openalex.org/W1599032155","https://openalex.org/W1600097617","https://openalex.org/W195533127","https://openalex.org/W1984545377","https://openalex.org/W1998773749","https://openalex.org/W2009988448","https://openalex.org/W2010965922","https://openalex.org/W2018651639","https://openalex.org/W2024915064","https://openalex.org/W2059887434","https://openalex.org/W2066293098","https://openalex.org/W2107134090","https://openalex.org/W2114080886","https://openalex.org/W2118013824","https://openalex.org/W2129974037","https://openalex.org/W2146327010","https://openalex.org/W2157361576","https://openalex.org/W2165182095","https://openalex.org/W2169738360","https://openalex.org/W2182274314","https://openalex.org/W2184253191","https://openalex.org/W2186700860","https://openalex.org/W2293579159","https://openalex.org/W2405173731","https://openalex.org/W2918142713","https://openalex.org/W2963054932","https://openalex.org/W2963486821","https://openalex.org/W3021638795","https://openalex.org/W4240144763","https://openalex.org/W4250042253","https://openalex.org/W4256207200","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W4385515363","https://openalex.org/W3208095355","https://openalex.org/W3139002074","https://openalex.org/W3049054441","https://openalex.org/W2946762040","https://openalex.org/W2904779692","https://openalex.org/W2841402245","https://openalex.org/W2788671511","https://openalex.org/W2185976384","https://openalex.org/W1990140521"],"abstract_inverted_index":{"In":[0],"literature":[1],"studies,":[2],"high-dimensional":[3,76],"data":[4,77],"reduces":[5],"the":[6,46,51,59,63,66,73,80,106,111,116,122,141,146,165,175,178,185],"efficiency":[7],"of":[8,44,62,75,128],"clustering":[9,49],"algorithms":[10],"and":[11,78,97,169],"maximises":[12],"execution":[13],"time.":[14,82],"Therefore,":[15],"in":[16,65,69,102,162,173],"this":[17],"paper,":[18],"we":[19],"propose":[20],"an":[21],"approach":[22,42,124,143,180],"called":[23],"a":[24,126,150,155],"BV-kmeans":[25],"(Bayesian":[26],"Vectorisation":[27],"along":[28],"with":[29,50,115,164],"k-means)":[30],"that":[31,54,132,140],"aims":[32],"to":[33,57,71,104,148,184],"improve":[34],"document":[35,48],"representation":[36],"models":[37],"for":[38,109],"text":[39],"clustering.":[40],"This":[41,160],"consists":[43],"integrating":[45],"k-means":[47,167,187],"Bayesian":[52],"Vectoriser":[53],"is":[55,161],"used":[56,85],"compute":[58],"probability":[60],"distribution":[61],"documents":[64,114,153],"vector":[67],"space":[68],"order":[70,103],"overcome":[72],"problems":[74],"lower":[79],"consumption":[81],"We":[83,119],"have":[84,120,133],"various":[86],"similarity":[87,112],"measures":[88],"which":[89,149],"are":[90],"namely:":[91],"K":[92],"divergence,":[93],"Squared":[94,98],"Euclidean":[95],"distance":[96,101],"\u03c7":[99],"2":[100],"determine":[105],"effective":[107],"metrics":[108],"modelling":[110],"between":[113],"proposed":[117,123,142,179],"approach.":[118],"evaluated":[121],"on":[125],"set":[127],"common":[129],"newspaper":[130],"websites":[131],"highly":[134],"dimensional":[135],"data.":[136],"Experimental":[137],"results":[138],"show":[139],"can":[144],"increase":[145],"degree":[147],"cluster":[151],"encases":[152],"from":[154],"specific":[156],"category":[157],"by":[158,181],"85%.":[159],"comparison":[163],"standard":[166,186],"algorithm":[168],"it":[170],"has":[171],"succeeded":[172],"lowering":[174],"runtime":[176],"using":[177],"95%":[182],"compared":[183],"algorithm.":[188]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W1986637734","counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2018,"cited_by_count":3},{"year":2016,"cited_by_count":1}],"updated_date":"2025-01-07T08:51:13.492069","created_date":"2016-06-24"}