{"id":"https://openalex.org/W2853260843","doi":"https://doi.org/10.1142/s1793351x18400123","title":"Statistical Unigram Analysis for Source Code Repository","display_name":"Statistical Unigram Analysis for Source Code Repository","publication_year":2018,"publication_date":"2018-06-01","ids":{"openalex":"https://openalex.org/W2853260843","doi":"https://doi.org/10.1142/s1793351x18400123","mag":"2853260843"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1142/s1793351x18400123","pdf_url":null,"source":{"id":"https://openalex.org/S4210201727","display_name":"International Journal of Semantic Computing","issn_l":"1793-7108","issn":["1793-7108","1793-351X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://mdsoar.org/bitstreams/dc03092e-06be-4f8a-9131-93625c6fb5ba/download","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5031477814","display_name":"Weifeng Xu","orcid":"https://orcid.org/0000-0002-1313-1136"},"institutions":[{"id":"https://openalex.org/I324100","display_name":"Bowie State University","ror":"https://ror.org/0567w8j84","country_code":"US","type":"education","lineage":["https://openalex.org/I324100"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Weifeng Xu","raw_affiliation_strings":["Department of Computer Science, Bowie State University, Bowie, Maryland, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Bowie State University, Bowie, Maryland, USA","institution_ids":["https://openalex.org/I324100"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080541939","display_name":"Dianxiang Xu","orcid":"https://orcid.org/0000-0003-4529-3540"},"institutions":[{"id":"https://openalex.org/I120156002","display_name":"Boise State University","ror":"https://ror.org/02e3zdp86","country_code":"US","type":"education","lineage":["https://openalex.org/I120156002"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dianxiang Xu","raw_affiliation_strings":["Department of Computer Science, Boise State University, Boise, Idaho, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Boise State University, Boise, Idaho, USA","institution_ids":["https://openalex.org/I120156002"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008102329","display_name":"Abdulrahman Alatawi","orcid":null},"institutions":[{"id":"https://openalex.org/I324100","display_name":"Bowie State University","ror":"https://ror.org/0567w8j84","country_code":"US","type":"education","lineage":["https://openalex.org/I324100"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Abdulrahman Alatawi","raw_affiliation_strings":["Department of Computer Science, Bowie State University, Bowie, Maryland, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Bowie State University, Bowie, Maryland, USA","institution_ids":["https://openalex.org/I324100"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081749732","display_name":"Omar El Ariss","orcid":null},"institutions":[{"id":"https://openalex.org/I206651237","display_name":"Texas A&M University \u2013 Commerce","ror":"https://ror.org/01red3556","country_code":"US","type":"education","lineage":["https://openalex.org/I206651237"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Omar El Ariss","raw_affiliation_strings":["Department of Computer Science, Texas A&M University, Commerce, TX, 75428, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Texas A&M University, Commerce, TX, 75428, USA","institution_ids":["https://openalex.org/I206651237"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5008545743","display_name":"Yunkai Liu","orcid":"https://orcid.org/0000-0002-8544-4051"},"institutions":[{"id":"https://openalex.org/I967637","display_name":"Gannon University","ror":"https://ror.org/02y041669","country_code":"US","type":"education","lineage":["https://openalex.org/I967637"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yunkai Liu","raw_affiliation_strings":["Department of Computer & Information Science, Gannon University, Erie, Pennsylvania, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer & Information Science, Gannon University, Erie, Pennsylvania, USA","institution_ids":["https://openalex.org/I967637"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"fulltext_origin":"ngrams","cited_by_count":1,"citation_normalized_percentile":{"value":0.622625,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":62,"max":70},"biblio":{"volume":"12","issue":"02","first_page":"237","last_page":"260"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12423","display_name":"Software Reliability and Analysis Research","score":0.9892,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10430","display_name":"Software Engineering Techniques and Practices","score":0.984,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.5143872},{"id":"https://openalex.org/keywords/open-domain","display_name":"Open domain","score":0.47026214}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.92135227},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.7488048},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.61112136},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.57942945},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.5527222},{"id":"https://openalex.org/C3018397939","wikidata":"https://www.wikidata.org/wiki/Q3644502","display_name":"Open source","level":3,"score":0.5192961},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.5143872},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.50570893},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.48118502},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.47360396},{"id":"https://openalex.org/C2993776861","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Open domain","level":3,"score":0.47026214},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.45777887},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.33205843},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.23881328},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.15079698},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1142/s1793351x18400123","pdf_url":null,"source":{"id":"https://openalex.org/S4210201727","display_name":"International Journal of Semantic Computing","issn_l":"1793-7108","issn":["1793-7108","1793-351X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"https://hdl.handle.net/11603/18741","pdf_url":"https://mdsoar.org/bitstreams/dc03092e-06be-4f8a-9131-93625c6fb5ba/download","source":{"id":"https://openalex.org/S4306402556","display_name":"Maryland Shared Open Access Repository (USMAI Consortium)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://hdl.handle.net/11603/18741","pdf_url":"https://mdsoar.org/bitstreams/dc03092e-06be-4f8a-9131-93625c6fb5ba/download","source":{"id":"https://openalex.org/S4306402556","display_name":"Maryland Shared Open Access Repository (USMAI Consortium)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"display_name":"Quality education","score":0.52,"id":"https://metadata.un.org/sdg/4"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":4,"referenced_works":["https://openalex.org/W2040713190","https://openalex.org/W2085574295","https://openalex.org/W2159739762","https://openalex.org/W2886313391"],"related_works":["https://openalex.org/W632256878","https://openalex.org/W4311938462","https://openalex.org/W4224861681","https://openalex.org/W4211197663","https://openalex.org/W3123068371","https://openalex.org/W2491403535","https://openalex.org/W2479811461","https://openalex.org/W2355429491","https://openalex.org/W2113128227","https://openalex.org/W2104915799"],"abstract_inverted_index":{"Unigram":[0],"is":[1],"a":[2,17,91,101,111],"fundamental":[3],"element":[4],"of":[5,29,40,135],"[Formula:":[6],"see":[7],"text]-gram":[8],"in":[9,26,104],"natural":[10,18,137],"language":[11,19,138],"processing.":[12],"However,":[13],"unigrams":[14,41,55,126],"collected":[15,42,52],"from":[16,43,56,128],"corpus":[20,139],"are":[21],"unsuitable":[22],"for":[23,99],"solving":[24,100,143],"problems":[25],"the":[27,38,125,133,136,144],"domain":[28,145],"computer":[30],"programming":[31],"languages.":[32],"In":[33],"this":[34],"paper,":[35],"we":[36,50,69],"analyze":[37],"properties":[39,73,98],"an":[44],"ultra-large":[45],"source":[46,60,105,129],"code":[47,106,130],"repository.":[48],"Specifically,":[49],"have":[51,70],"1.01":[53],"billion":[54],"0.7":[57],"million":[58],"open":[59],"projects":[61],"hosted":[62],"at":[63],"GitHub.com.":[64],"By":[65],"analyzing":[66],"these":[67,97],"unigrams,":[68],"discovered":[71],"statistical":[72],"regarding":[74],"(1)":[75],"how":[76,85,108],"developers":[77,86],"name":[78],"variables,":[79],"methods,":[80],"and":[81,83],"classes,":[82],"(2)":[84],"choose":[87],"abbreviations.":[88],"We":[89],"describe":[90],"probabilistic":[92],"model":[93],"which":[94],"relies":[95],"on":[96],"well-known":[102],"problem":[103],"analysis:":[107],"to":[109,114],"expand":[110],"given":[112],"abbreviation":[113],"its":[115],"original":[116],"indented":[117],"word.":[118],"Our":[119],"empirical":[120],"study":[121],"shows":[122],"that":[123],"using":[124,134],"extracted":[127],"repository":[131],"outperforms":[132],"by":[140],"21%":[141],"when":[142],"specific":[146],"problems.":[147]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2853260843","counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2024-12-24T08:50:20.741557","created_date":"2018-07-19"}