{"id":"https://openalex.org/W4297899129","doi":"https://doi.org/10.48550/arxiv.2209.09932","title":"Comparative analysis of real bugs in open-source Machine Learning projects -- A Registered Report","display_name":"Comparative analysis of real bugs in open-source Machine Learning projects -- A Registered Report","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4297899129","doi":"https://doi.org/10.48550/arxiv.2209.09932"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2209.09932","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2209.09932","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5055504016","display_name":"Tuan Dung Lai","orcid":"https://orcid.org/0000-0002-1358-3351"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lai, Tuan Dung","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024537636","display_name":"Anj Simmons","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Simmons, Anj","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012088137","display_name":"Scott Barnett","orcid":"https://orcid.org/0000-0002-3187-4937"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Barnett, Scott","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076543742","display_name":"Jean-Guy Schneider","orcid":"https://orcid.org/0000-0002-9827-5496"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Schneider, Jean-Guy","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5030486012","display_name":"Rajesh Vasa","orcid":"https://orcid.org/0000-0003-4805-1467"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vasa, Rajesh","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":60},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9949,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9949,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9938,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9918,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7631403},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.65282524},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.52952904},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.51359487},{"id":"https://openalex.org/C3018397939","wikidata":"https://www.wikidata.org/wiki/Q3644502","display_name":"Open source","level":3,"score":0.50747937},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.49469563},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.4891689},{"id":"https://openalex.org/C138268822","wikidata":"https://www.wikidata.org/wiki/Q1051925","display_name":"Resolution (logic)","level":2,"score":0.46649075},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.4183678},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.41784576},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.38445166},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.32169366},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3070686},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.107141584},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2209.09932","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2209.09932","pdf_url":"http://arxiv.org/pdf/2209.09932","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2209.09932","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2209.09932","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"score":0.42,"id":"https://metadata.un.org/sdg/17","display_name":"Partnerships for the goals"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W632256878","https://openalex.org/W4311938462","https://openalex.org/W4234772502","https://openalex.org/W3037187668","https://openalex.org/W2491403535","https://openalex.org/W2479811461","https://openalex.org/W2380685755","https://openalex.org/W2355429491","https://openalex.org/W2113128227","https://openalex.org/W2104915799"],"abstract_inverted_index":{"Background:":[0],"Machine":[1],"Learning":[2],"(ML)":[3],"systems":[4,12,21,59],"rely":[5],"on":[6,35,143,221],"data":[7,25],"to":[8,18,88,97,111,140,188],"make":[9],"predictions,":[10],"the":[11,24,40,74,91,103,119,155,197,209,227],"have":[13,60],"many":[14],"added":[15],"components":[16],"compared":[17],"traditional":[19,80],"software":[20,36],"such":[22,51],"as":[23,52],"processing":[26],"pipeline,":[27,29],"serving":[28],"and":[30,43,54,65,79,96,126,129,162,172,183,191,212,218,225],"model":[31],"training.":[32],"Existing":[33],"research":[34],"maintenance":[37],"has":[38],"studied":[39],"issue-reporting":[41],"needs":[42,93],"resolution":[44,105,122,210],"process":[45],"for":[46,229],"different":[47,75],"types":[48],"of":[49,63,73,121,133,157,159,203,214,216,232],"issues,":[50],"performance":[53],"security":[55],"issues.":[56,164,193],"However,":[57],"ML":[58,67,78,125,134,150,160,179,190,217],"specific":[61],"classes":[62],"faults,":[64],"reporting":[66,92],"issues":[68,128,135,161,168,198,220],"requires":[69],"domain-specific":[70],"information.":[71],"Because":[72],"characteristics":[76],"between":[77,124],"Software":[81],"Engineering":[82],"systems,":[83],"we":[84],"do":[85],"not":[86],"know":[87],"what":[89,98],"extent":[90,99],"are":[94],"different,":[95],"these":[100],"differences":[101],"impact":[102],"issue":[104,145],"process.":[106],"Objective:":[107],"Our":[108],"objective":[109],"is":[110,115],"investigate":[112,154],"whether":[113,130],"there":[114],"a":[116,137,200,222],"discrepancy":[117],"in":[118,147,175],"distribution":[120],"time":[123,139,211],"non-ML":[127,163,192,219],"certain":[131],"categories":[132],"require":[136],"longer":[138],"resolve":[141],"based":[142],"real":[144],"reports":[146],"open-source":[148],"applied":[149,178],"projects.":[151],"We":[152,166,194,207],"further":[153],"size":[156,213],"fix":[158,215],"Method:":[165],"extract":[167],"reports,":[169],"pull":[170],"requests":[171],"code":[173],"files":[174],"recent":[176],"active":[177],"projects":[180],"from":[181],"Github,":[182],"use":[184],"an":[185],"automatic":[186],"approach":[187],"filter":[189],"manually":[195],"label":[196],"using":[199],"known":[201],"taxonomy":[202],"deep":[204],"learning":[205],"bugs.":[206],"measure":[208],"controlled":[223],"sample":[224],"compare":[226],"distributions":[228],"each":[230],"category":[231],"issue.":[233]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4297899129","counts_by_year":[],"updated_date":"2025-01-08T08:54:23.649467","created_date":"2022-10-01"}