{"id":"https://openalex.org/W3175171031","doi":"https://doi.org/10.1142/s0218194021500273","title":"Semi-supervised Heterogeneous Defect Prediction with Open-source Projects on GitHub","display_name":"Semi-supervised Heterogeneous Defect Prediction with Open-source Projects on GitHub","publication_year":2021,"publication_date":"2021-06-01","ids":{"openalex":"https://openalex.org/W3175171031","doi":"https://doi.org/10.1142/s0218194021500273","mag":"3175171031"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1142/s0218194021500273","pdf_url":null,"source":{"id":"https://openalex.org/S131442419","display_name":"International Journal of Software Engineering and Knowledge Engineering","issn_l":"0218-1940","issn":["0218-1940","1793-6403"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101718114","display_name":"Ying Sun","orcid":null},"institutions":[{"id":"https://openalex.org/I41198531","display_name":"Nanjing University of Posts and Telecommunications","ror":"https://ror.org/043bpky34","country_code":"CN","type":"education","lineage":["https://openalex.org/I41198531"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ying Sun","raw_affiliation_strings":["School of Computer Science, Nanjing University of Posts and Telecommunications, Nanjing 210023, P.\u00a0R.\u00a0China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Nanjing University of Posts and Telecommunications, Nanjing 210023, P.\u00a0R.\u00a0China","institution_ids":["https://openalex.org/I41198531"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029691902","display_name":"Xiao\u2010Yuan Jing","orcid":"https://orcid.org/0000-0002-0392-8475"},"institutions":[{"id":"https://openalex.org/I41198531","display_name":"Nanjing University of Posts and Telecommunications","ror":"https://ror.org/043bpky34","country_code":"CN","type":"education","lineage":["https://openalex.org/I41198531"]},{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xiao-Yuan Jing","raw_affiliation_strings":["School of Computer Science, Nanjing University of Posts and Telecommunications, Nanjing 210023, P.\u00a0R.\u00a0China","School of Computer Science, Wuhan University, Wuhan 430072, P.\u00a0R.\u00a0China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Nanjing University of Posts and Telecommunications, Nanjing 210023, P.\u00a0R.\u00a0China","institution_ids":["https://openalex.org/I41198531"]},{"raw_affiliation_string":"School of Computer Science, Wuhan University, Wuhan 430072, P.\u00a0R.\u00a0China","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039792198","display_name":"Fei Wu","orcid":"https://orcid.org/0000-0001-5498-4947"},"institutions":[{"id":"https://openalex.org/I41198531","display_name":"Nanjing University of Posts and Telecommunications","ror":"https://ror.org/043bpky34","country_code":"CN","type":"education","lineage":["https://openalex.org/I41198531"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Fei Wu","raw_affiliation_strings":["School of Computer Science, Nanjing University of Posts and Telecommunications, Nanjing 210023, P.\u00a0R.\u00a0China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Nanjing University of Posts and Telecommunications, Nanjing 210023, P.\u00a0R.\u00a0China","institution_ids":["https://openalex.org/I41198531"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008307487","display_name":"Xiwei Dong","orcid":"https://orcid.org/0000-0001-5013-673X"},"institutions":[{"id":"https://openalex.org/I41198531","display_name":"Nanjing University of Posts and Telecommunications","ror":"https://ror.org/043bpky34","country_code":"CN","type":"education","lineage":["https://openalex.org/I41198531"]},{"id":"https://openalex.org/I134626604","display_name":"Jiujiang University","ror":"https://ror.org/0066vpg85","country_code":"CN","type":"education","lineage":["https://openalex.org/I134626604"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiwei Dong","raw_affiliation_strings":["School of Computer Science, Nanjing University of Posts and Telecommunications, Nanjing 210023, P.\u00a0R.\u00a0China","School of Computer and Big Data Science, Jiujiang University, Jiujiang 332005, P.\u00a0R.\u00a0China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Nanjing University of Posts and Telecommunications, Nanjing 210023, P.\u00a0R.\u00a0China","institution_ids":["https://openalex.org/I41198531"]},{"raw_affiliation_string":"School of Computer and Big Data Science, Jiujiang University, Jiujiang 332005, P.\u00a0R.\u00a0China","institution_ids":["https://openalex.org/I134626604"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100629960","display_name":"Yanfei Sun","orcid":"https://orcid.org/0000-0003-0085-1545"},"institutions":[{"id":"https://openalex.org/I41198531","display_name":"Nanjing University of Posts and Telecommunications","ror":"https://ror.org/043bpky34","country_code":"CN","type":"education","lineage":["https://openalex.org/I41198531"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yanfei Sun","raw_affiliation_strings":["Jiangsu Engineering Research Center of HPC and Intelligent Processing, Nanjing 210003, P.\u00a0R.\u00a0China","School of Internet of Things, Nanjing University of Posts and Telecommunications, Nanjing 210023, P.\u00a0R.\u00a0China"],"affiliations":[{"raw_affiliation_string":"School of Internet of Things, Nanjing University of Posts and Telecommunications, Nanjing 210023, P.\u00a0R.\u00a0China","institution_ids":["https://openalex.org/I41198531"]},{"raw_affiliation_string":"Jiangsu Engineering Research Center of HPC and Intelligent Processing, Nanjing 210003, P.\u00a0R.\u00a0China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5109232005","display_name":"Ruchuan Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I41198531","display_name":"Nanjing University of Posts and Telecommunications","ror":"https://ror.org/043bpky34","country_code":"CN","type":"education","lineage":["https://openalex.org/I41198531"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ruchuan Wang","raw_affiliation_strings":["School of Computer Science, Nanjing University of Posts and Telecommunications, Nanjing 210023, P.\u00a0R.\u00a0China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Nanjing University of Posts and Telecommunications, Nanjing 210023, P.\u00a0R.\u00a0China","institution_ids":["https://openalex.org/I41198531"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5029691902","https://openalex.org/A5039792198"],"corresponding_institution_ids":["https://openalex.org/I41198531","https://openalex.org/I37461747","https://openalex.org/I41198531"],"apc_list":null,"apc_paid":null,"fwci":1.367,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.935872,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":78,"max":81},"biblio":{"volume":"31","issue":"06","first_page":"889","last_page":"916"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9998,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9998,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12423","display_name":"Software Reliability and Analysis Research","score":0.9829,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10430","display_name":"Software Engineering Techniques and Practices","score":0.9784,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.6084082},{"id":"https://openalex.org/keywords/labeled-data","display_name":"Labeled data","score":0.5613146},{"id":"https://openalex.org/keywords/supervised-learning","display_name":"Supervised Learning","score":0.50145364},{"id":"https://openalex.org/keywords/predictive-modelling","display_name":"Predictive modelling","score":0.4408978},{"id":"https://openalex.org/keywords/representation","display_name":"Representation","score":0.43126932}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.79564184},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.63872874},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.6084082},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.56597006},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.5613146},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.55335},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.51043266},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.50145364},{"id":"https://openalex.org/C58973888","wikidata":"https://www.wikidata.org/wiki/Q1041418","display_name":"Semi-supervised learning","level":2,"score":0.47385553},{"id":"https://openalex.org/C69738355","wikidata":"https://www.wikidata.org/wiki/Q1228929","display_name":"Linear discriminant analysis","level":2,"score":0.44600737},{"id":"https://openalex.org/C45804977","wikidata":"https://www.wikidata.org/wiki/Q7239673","display_name":"Predictive modelling","level":2,"score":0.4408978},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.43126932},{"id":"https://openalex.org/C78397625","wikidata":"https://www.wikidata.org/wiki/Q192487","display_name":"Discriminant","level":2,"score":0.41880786},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4165032},{"id":"https://openalex.org/C3018397939","wikidata":"https://www.wikidata.org/wiki/Q3644502","display_name":"Open source","level":3,"score":0.4109622},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.21106154},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.08720103},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1142/s0218194021500273","pdf_url":null,"source":{"id":"https://openalex.org/S131442419","display_name":"International Journal of Software Engineering and Knowledge Engineering","issn_l":"0218-1940","issn":["0218-1940","1793-6403"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities","score":0.63}],"grants":[{"funder":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China","award_id":"61933013"},{"funder":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China","award_id":"62076139"},{"funder":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China","award_id":"61702280"},{"funder":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China","award_id":"62041603"},{"funder":"https://openalex.org/F4320321543","funder_display_name":"China Postdoctoral Science Foundation","award_id":"2019M661901"},{"funder":"https://openalex.org/F4320321921","funder_display_name":"Natural Science Foundation of Guangdong Province","award_id":"2019A1515011076"},{"funder":"https://openalex.org/F4320322665","funder_display_name":"Natural Science Foundation of Jiangxi Province","award_id":"20202BABL202036"},{"funder":"https://openalex.org/F4320335471","funder_display_name":"Jiangsu Planned Projects for Postdoctoral Research Funds","award_id":"2019K024"},{"funder":"https://openalex.org/F4320335471","funder_display_name":"Jiangsu Planned Projects for Postdoctoral Research Funds","award_id":"2020M671678"},{"funder":"https://openalex.org/F4320335768","funder_display_name":"National Postdoctoral Program for Innovative Talents","award_id":"BX20180146"}],"datasets":[],"versions":[],"referenced_works_count":70,"referenced_works":["https://openalex.org/W1498467937","https://openalex.org/W1505648523","https://openalex.org/W1565746575","https://openalex.org/W1656679015","https://openalex.org/W1857789879","https://openalex.org/W1964962870","https://openalex.org/W1976096185","https://openalex.org/W1978813754","https://openalex.org/W1979360936","https://openalex.org/W1983320747","https://openalex.org/W1994719586","https://openalex.org/W2008596407","https://openalex.org/W2019288156","https://openalex.org/W2036580571","https://openalex.org/W2041496338","https://openalex.org/W2046830558","https://openalex.org/W2055949897","https://openalex.org/W2058207467","https://openalex.org/W2064330644","https://openalex.org/W2066563654","https://openalex.org/W2068430427","https://openalex.org/W2069735839","https://openalex.org/W2079057609","https://openalex.org/W2097283422","https://openalex.org/W2097883090","https://openalex.org/W2098136027","https://openalex.org/W2101227285","https://openalex.org/W2104290444","https://openalex.org/W2105776892","https://openalex.org/W2109362066","https://openalex.org/W2118283821","https://openalex.org/W2120457925","https://openalex.org/W2126626812","https://openalex.org/W2146338950","https://openalex.org/W2151666086","https://openalex.org/W2163732854","https://openalex.org/W2167999447","https://openalex.org/W2304692780","https://openalex.org/W2395429379","https://openalex.org/W2474835145","https://openalex.org/W2476464413","https://openalex.org/W2510885373","https://openalex.org/W2612972698","https://openalex.org/W2731935965","https://openalex.org/W2735995639","https://openalex.org/W2748169822","https://openalex.org/W2753715782","https://openalex.org/W2766521509","https://openalex.org/W2767434420","https://openalex.org/W2783657687","https://openalex.org/W2790699127","https://openalex.org/W2793681838","https://openalex.org/W2795013017","https://openalex.org/W2802138742","https://openalex.org/W2897049944","https://openalex.org/W2911121806","https://openalex.org/W2922408684","https://openalex.org/W2955354980","https://openalex.org/W2955797575","https://openalex.org/W2998024052","https://openalex.org/W3005637815","https://openalex.org/W3009761259","https://openalex.org/W3012871725","https://openalex.org/W3033234605","https://openalex.org/W4206600618","https://openalex.org/W4232767983","https://openalex.org/W4238859253","https://openalex.org/W4240808027","https://openalex.org/W4250023757","https://openalex.org/W4252684946"],"related_works":["https://openalex.org/W4312414840","https://openalex.org/W4206276646","https://openalex.org/W34092691","https://openalex.org/W2952937263","https://openalex.org/W2943467239","https://openalex.org/W2794908468","https://openalex.org/W2531570999","https://openalex.org/W192740413","https://openalex.org/W1571801203","https://openalex.org/W101422005"],"abstract_inverted_index":{"The":[0],"heterogeneous":[1,14],"defect":[2,42,47],"prediction":[3,43,48,189,192],"(HDP)":[4],"technique":[5,44],"can":[6,45,169,187],"predict":[7],"defects":[8],"in":[9],"a":[10,58,71,82,110,143],"target":[11],"company":[12,103],"using":[13],"metric":[15,90],"data":[16,33,38,99,122,127,186],"from":[17,128,138],"external":[18],"company,":[19],"which":[20],"has":[21,164],"received":[22],"substantial":[23],"research":[24],"attention.":[25],"However,":[26],"existing":[27],"HDP":[28,63],"methods":[29],"assume":[30],"that":[31,183],"source":[32],"is":[34,39,156,196],"labeled":[35,51,121],"but":[36],"labeling":[37],"expensive.":[40],"Semi-supervised":[41],"perform":[46],"with":[49],"few":[50],"data.":[52],"In":[53],"this":[54,67],"paper,":[55],"we":[56,69,132],"investigate":[57],"new":[59,72,144],"problem":[60],"\u2014":[61],"semi-supervised":[62,77,113],"(SHDP).":[64],"To":[65],"solve":[66],"problem,":[68],"propose":[70],"approach":[73],"named":[74],"cost-sensitive":[75,111],"kernel":[76,112],"correlation":[78,94],"analysis":[79,95,115],"(CKSCA)":[80],"as":[81],"solution":[83],"of":[84,101,135],"SHDP":[85,202],"problem.":[86,203],"It":[87,151,163],"introduces":[88],"unified":[89],"representation":[91],"and":[92,123,155,168,178,194,198],"canonical":[93],"to":[96,117,141],"make":[97],"the":[98,119,175],"distributions":[100],"different":[102,129],"projects":[104,137],"more":[105],"similar.":[106],"CKSCA":[107,195],"also":[108],"designs":[109],"discriminant":[114],"mechanism":[116],"utilize":[118],"limited":[120],"sufficient":[124],"real-life":[125],"unlabeled":[126,146,184],"companies.":[130],"Besides":[131],"collect":[133],"lots":[134],"open-source":[136],"GitHub":[139],"website":[140],"construct":[142],"large-scale":[145],"dataset":[147,177],"called":[148],"GITHUB":[149,176,185],"dataset.":[150,162],"contains":[152],"26,407":[153],"modules":[154],"greater":[157],"than":[158],"each":[159],"public":[160,166,180],"project":[161],"been":[165],"online":[167],"be":[170],"extended":[171],"continuously.":[172],"Experiments":[173],"on":[174],"other":[179],"datasets":[181],"indicate":[182],"help":[188],"model":[190],"improve":[191],"performance,":[193],"effective":[197],"efficient":[199],"for":[200],"solving":[201]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W3175171031","counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1}],"updated_date":"2025-01-19T21:24:30.331697","created_date":"2021-07-05"}