{"id":"https://openalex.org/W4229033775","doi":"https://doi.org/10.1145/3520312.3534868","title":"Automatically debugging AutoML pipelines using maro: ML automated remediation oracle","display_name":"Automatically debugging AutoML pipelines using maro: ML automated remediation oracle","publication_year":2022,"publication_date":"2022-06-10","ids":{"openalex":"https://openalex.org/W4229033775","doi":"https://doi.org/10.1145/3520312.3534868"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/3520312.3534868","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"proceedings-article","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2205.01311","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5012855077","display_name":"Julian Dolby","orcid":"https://orcid.org/0000-0002-6658-2678"},"institutions":[],"countries":["US"],"is_corresponding":false,"raw_author_name":"Julian Dolby","raw_affiliation_strings":["IBM Research, USA"],"affiliations":[{"raw_affiliation_string":"IBM Research, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077279308","display_name":"Jason Tsay","orcid":"https://orcid.org/0000-0002-8085-5708"},"institutions":[],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jason Tsay","raw_affiliation_strings":["IBM Research, USA"],"affiliations":[{"raw_affiliation_string":"IBM Research, USA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5079080602","display_name":"Martin Hirzel","orcid":"https://orcid.org/0009-0006-8840-6065"},"institutions":[],"countries":["US"],"is_corresponding":false,"raw_author_name":"Martin Hirzel","raw_affiliation_strings":["IBM Research, USA"],"affiliations":[{"raw_affiliation_string":"IBM Research, USA","institution_ids":[]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":3,"citation_normalized_percentile":{"value":0.640014,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":76,"max":80},"biblio":{"volume":null,"issue":null,"first_page":"60","last_page":"69"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9988,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9988,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9936,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.9816,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/python","display_name":"Python","score":0.63704574},{"id":"https://openalex.org/keywords/troubleshooting","display_name":"Trouble shooting","score":0.5420386},{"id":"https://openalex.org/keywords/algorithmic-program-debugging","display_name":"Algorithmic program debugging","score":0.5107398},{"id":"https://openalex.org/keywords/feature-engineering","display_name":"Feature Engineering","score":0.46644542},{"id":"https://openalex.org/keywords/hyperparameter","display_name":"Hyperparameter","score":0.43988764},{"id":"https://openalex.org/keywords/data-pre-processing","display_name":"Data pre-processing","score":0.43093133}],"concepts":[{"id":"https://openalex.org/C168065819","wikidata":"https://www.wikidata.org/wiki/Q845566","display_name":"Debugging","level":2,"score":0.84259266},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.74879634},{"id":"https://openalex.org/C519991488","wikidata":"https://www.wikidata.org/wiki/Q28865","display_name":"Python (programming language)","level":2,"score":0.63704574},{"id":"https://openalex.org/C55166926","wikidata":"https://www.wikidata.org/wiki/Q2892946","display_name":"Oracle","level":2,"score":0.6011489},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.5548507},{"id":"https://openalex.org/C175309249","wikidata":"https://www.wikidata.org/wiki/Q725864","display_name":"Pipeline transport","level":2,"score":0.54921967},{"id":"https://openalex.org/C147494362","wikidata":"https://www.wikidata.org/wiki/Q2078905","display_name":"Troubleshooting","level":2,"score":0.5420386},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.5289417},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.5254038},{"id":"https://openalex.org/C136388014","wikidata":"https://www.wikidata.org/wiki/Q17084976","display_name":"Algorithmic program debugging","level":3,"score":0.5107398},{"id":"https://openalex.org/C2778827112","wikidata":"https://www.wikidata.org/wiki/Q22245680","display_name":"Feature engineering","level":3,"score":0.46644542},{"id":"https://openalex.org/C8642999","wikidata":"https://www.wikidata.org/wiki/Q4171168","display_name":"Hyperparameter","level":2,"score":0.43988764},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4330978},{"id":"https://openalex.org/C10551718","wikidata":"https://www.wikidata.org/wiki/Q5227332","display_name":"Data pre-processing","level":2,"score":0.43093133},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3991022},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.3890296},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.3819555},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37245148},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.20171463},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.12159714},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.10609591},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.089604944},{"id":"https://openalex.org/C87717796","wikidata":"https://www.wikidata.org/wiki/Q146326","display_name":"Environmental engineering","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/3520312.3534868","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2205.01311","pdf_url":"https://arxiv.org/pdf/2205.01311","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2205.01311","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2205.01311","pdf_url":"https://arxiv.org/pdf/2205.01311","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":["https://openalex.org/W4229033775"],"referenced_works_count":34,"referenced_works":["https://openalex.org/W1526710119","https://openalex.org/W2050876821","https://openalex.org/W2101234009","https://openalex.org/W2113207845","https://openalex.org/W2163671349","https://openalex.org/W2182361439","https://openalex.org/W2189162242","https://openalex.org/W2552408584","https://openalex.org/W2768348081","https://openalex.org/W2788572835","https://openalex.org/W2899327512","https://openalex.org/W2922234936","https://openalex.org/W2953027140","https://openalex.org/W2964024268","https://openalex.org/W2968594320","https://openalex.org/W2970433196","https://openalex.org/W2974817986","https://openalex.org/W2984109651","https://openalex.org/W2997591727","https://openalex.org/W3032244227","https://openalex.org/W3084564478","https://openalex.org/W3090643686","https://openalex.org/W3098910265","https://openalex.org/W3104332093","https://openalex.org/W3104663419","https://openalex.org/W3158688329","https://openalex.org/W3178517081","https://openalex.org/W3179148751","https://openalex.org/W3181437774","https://openalex.org/W3211385798","https://openalex.org/W398859631","https://openalex.org/W4229033775","https://openalex.org/W4244452926","https://openalex.org/W4287728257"],"related_works":["https://openalex.org/W4318325534","https://openalex.org/W4240398146","https://openalex.org/W4210597238","https://openalex.org/W4206476896","https://openalex.org/W3173704888","https://openalex.org/W3172545305","https://openalex.org/W3125271421","https://openalex.org/W3013479934","https://openalex.org/W2950310564","https://openalex.org/W2128551247"],"abstract_inverted_index":{"Machine":[0],"learning":[1],"in":[2,99],"practice":[3],"often":[4],"involves":[5],"complex":[6],"pipelines":[7,17],"for":[8,40,73,132],"data":[9,65,110],"cleansing,":[10],"feature":[11],"engineering,":[12],"preprocessing,":[13],"and":[14,28,52,58,60,82,97,118,129,145],"prediction.":[15],"These":[16],"are":[18],"composed":[19],"of":[20,43,95],"operators,":[21,45],"which":[22,90],"have":[23],"to":[24,48,154],"be":[25,32],"correctly":[26,33],"connected":[27],"whose":[29],"hyperparameters":[30,47],"must":[31],"configured.":[34],"Unfortunately,":[35],"it":[36],"is":[37,56],"quite":[38],"common":[39],"certain":[41],"combinations":[42],"datasets,":[44],"or":[46],"cause":[49],"failures.":[50],"Diagnosing":[51],"fixing":[53],"those":[54],"failures":[55],"tedious":[57],"error-prone":[59],"can":[61],"seriously":[62],"derail":[63],"a":[64,84,93,100,135],"scientist's":[66],"workflow.":[67],"This":[68],"paper":[69],"describes":[70],"an":[71,76],"approach":[72],"automatically":[74,138],"debugging":[75],"ML":[77],"pipeline,":[78],"explaining":[79],"the":[80,108],"failures,":[81],"producing":[83],"remediation.":[85],"We":[86,124],"implemented":[87],"our":[88,127],"approach,":[89],"builds":[91],"on":[92],"combination":[94],"AutoML":[96,119],"SMT,":[98],"tool":[101,128],"called":[102],"Maro.":[103],"Maro":[104],"works":[105],"seamlessly":[106],"with":[107],"familiar":[109],"science":[111],"ecosystem":[112],"including":[113],"Python,":[114],"Jupyter":[115],"notebooks,":[116],"scikit-learn,":[117],"tools":[120],"such":[121],"as":[122],"Hyperopt.":[123],"empirically":[125],"evaluate":[126],"find":[130],"that":[131],"most":[133],"cases,":[134],"single":[136],"remediation":[137],"fixes":[139],"errors,":[140],"produces":[141],"no":[142],"additional":[143],"faults,":[144],"does":[146],"not":[147],"significantly":[148],"impact":[149],"optimal":[150],"accuracy":[151],"nor":[152],"time":[153],"convergence.":[155]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4229033775","counts_by_year":[{"year":2022,"cited_by_count":3}],"updated_date":"2024-12-12T23:28:39.950902","created_date":"2022-05-08"}