{"id":"https://openalex.org/W4285610227","doi":"https://doi.org/10.1145/3529446.3529457","title":"A quantitative comparison of automated cleaning techniques for web scraped image data of \u2018Smart Cities\u2019","display_name":"A quantitative comparison of automated cleaning techniques for web scraped image data of \u2018Smart Cities\u2019","publication_year":2022,"publication_date":"2022-03-25","ids":{"openalex":"https://openalex.org/W4285610227","doi":"https://doi.org/10.1145/3529446.3529457"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/3529446.3529457","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://pure.uva.nl/ws/files/109698343/3529446.3529457.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5071913897","display_name":"Bob de Witte","orcid":null},"institutions":[{"id":"https://openalex.org/I887064364","display_name":"University of Amsterdam","ror":"https://ror.org/04dkp9463","country_code":"NL","type":"education","lineage":["https://openalex.org/I887064364"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Bob de Witte","raw_affiliation_strings":["University of Amsterdam, Netherlands"],"affiliations":[{"raw_affiliation_string":"University of Amsterdam, Netherlands","institution_ids":["https://openalex.org/I887064364"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047564208","display_name":"Soufiane Bouarfa","orcid":"https://orcid.org/0000-0003-4005-1796"},"institutions":[],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Soufiane Bouarfa","raw_affiliation_strings":["Accenture Data & AI, Netherlands"],"affiliations":[{"raw_affiliation_string":"Accenture Data & AI, Netherlands","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5045459089","display_name":"Zeno Geradts","orcid":"https://orcid.org/0000-0001-5912-5295"},"institutions":[{"id":"https://openalex.org/I158244335","display_name":"Netherlands Forensic Institute","ror":"https://ror.org/04s2z4291","country_code":"NL","type":"facility","lineage":["https://openalex.org/I158244335"]},{"id":"https://openalex.org/I887064364","display_name":"University of Amsterdam","ror":"https://ror.org/04dkp9463","country_code":"NL","type":"education","lineage":["https://openalex.org/I887064364"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Zeno Geradts","raw_affiliation_strings":["Netherlands Forensic Institute, Netherlands and University of Amsterdam, Netherlands"],"affiliations":[{"raw_affiliation_string":"Netherlands Forensic Institute, Netherlands and University of Amsterdam, Netherlands","institution_ids":["https://openalex.org/I158244335","https://openalex.org/I887064364"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":60},"biblio":{"volume":"7","issue":null,"first_page":"64","last_page":"71"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T12698","display_name":"3D Modeling in Geospatial Applications","score":0.9697,"subfield":{"id":"https://openalex.org/subfields/2215","display_name":"Building and Construction"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12698","display_name":"3D Modeling in Geospatial Applications","score":0.9697,"subfield":{"id":"https://openalex.org/subfields/2215","display_name":"Building and Construction"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14319","display_name":"Currency Recognition and Detection","score":0.9694,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10481","display_name":"Computer Graphics and Visualization Techniques","score":0.9653,"subfield":{"id":"https://openalex.org/subfields/1704","display_name":"Computer Graphics and Computer-Aided Design"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.60558724},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.32894108}],"mesh":[],"locations_count":2,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/3529446.3529457","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"https://dare.uva.nl/personal/pure/en/publications/a-quantitative-comparison-of-automated-cleaning-techniques-for-web-scraped-image-data-of-smart-cities(90eb419d-5c7b-4a92-8844-cf32626450dc).html","pdf_url":"https://pure.uva.nl/ws/files/109698343/3529446.3529457.pdf","source":{"id":"https://openalex.org/S4306401843","display_name":"Data Archiving and Networked Services (DANS)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1322597698","host_organization_name":"Royal Netherlands Academy of Arts and Sciences","host_organization_lineage":["https://openalex.org/I1322597698"],"host_organization_lineage_names":["Royal Netherlands Academy of Arts and Sciences"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://dare.uva.nl/personal/pure/en/publications/a-quantitative-comparison-of-automated-cleaning-techniques-for-web-scraped-image-data-of-smart-cities(90eb419d-5c7b-4a92-8844-cf32626450dc).html","pdf_url":"https://pure.uva.nl/ws/files/109698343/3529446.3529457.pdf","source":{"id":"https://openalex.org/S4306401843","display_name":"Data Archiving and Networked Services (DANS)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1322597698","host_organization_name":"Royal Netherlands Academy of Arts and Sciences","host_organization_lineage":["https://openalex.org/I1322597698"],"host_organization_lineage_names":["Royal Netherlands Academy of Arts and Sciences"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9","score":0.4}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":25,"referenced_works":["https://openalex.org/W1576445103","https://openalex.org/W1791560514","https://openalex.org/W2001642682","https://openalex.org/W2047920195","https://openalex.org/W2108598243","https://openalex.org/W2110158442","https://openalex.org/W2112796928","https://openalex.org/W2121927366","https://openalex.org/W2194775991","https://openalex.org/W2213892522","https://openalex.org/W2335728318","https://openalex.org/W2792633707","https://openalex.org/W2949234369","https://openalex.org/W2963037989","https://openalex.org/W2966284335","https://openalex.org/W2986445670","https://openalex.org/W3013682536","https://openalex.org/W3047375952","https://openalex.org/W3083097533","https://openalex.org/W3099364866","https://openalex.org/W3118608800","https://openalex.org/W3161184525","https://openalex.org/W4240153047","https://openalex.org/W4298628981","https://openalex.org/W4320002812"],"related_works":["https://openalex.org/W4402327032","https://openalex.org/W4396701345","https://openalex.org/W4396696052","https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2382290278","https://openalex.org/W2376932109","https://openalex.org/W2358668433","https://openalex.org/W2001405890"],"abstract_inverted_index":{"This":[0],"paper":[1],"implements":[2],"and":[3,56,113,165],"compares":[4],"four":[5,48,95,138],"automated":[6,40,133,146],"image":[7,27,41],"cleaning":[8,23,42,96,131,134],"techniques":[9,49,135,147],"through":[10,70,118],"the":[11,18,34,37,47,94,121,144],"ResNet-34":[12,98],"Convolutional":[13],"Neural":[14],"Network,":[15],"motivated":[16],"by":[17],"need":[19],"to":[20,54,81,92,106,162],"reduce":[21,163],"manual":[22,130,160],"efforts":[24],"of":[25,31,46,110],"large":[26,157],"datasets.":[28,61],"For":[29],"each":[30],"these":[32],"techniques,":[33,97],"relation":[35],"with":[36,68,73,84,101],"literature":[38],"on":[39,120,136,156],"is":[43],"identified.":[44],"Each":[45],"uses":[50],"a":[51,74,85,149],"specific":[52,75,86],"criterion":[53],"identify":[55],"remove":[57],"unwanted":[58],"images":[59,67,72,83,104],"from":[60,65],"The":[62,126],"criteria":[63],"range":[64],"identifying":[66,71,82],"text,":[69],"size":[76],"or":[77,148],"tonal":[78],"distribution,":[79],"up":[80],"training":[87],"loss":[88],"value.":[89],"In":[90],"order":[91],"evaluate":[93],"was":[99],"trained":[100],"web":[102],"scraped":[103],"corresponding":[105],"15":[107],"object":[108],"classes":[109],"'Smart":[111],"Cities',":[112],"accuracy":[114],"results":[115,127],"were":[116],"obtained":[117],"testing":[119],"CalTech":[122],"256":[123],"dataset":[124,167],"subset.":[125],"show":[128],"that":[129,143],"outperforms":[132],"all":[137],"criteria.":[139],"However,":[140],"analysis":[141],"reveals":[142],"individual":[145],"combination":[150],"thereof":[151],"can":[152],"initially":[153],"be":[154],"deployed":[155],"datasets":[158],"before":[159],"verification":[161],"workload":[164],"increase":[166],"stability.":[168]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4285610227","counts_by_year":[],"updated_date":"2024-12-06T04:39:49.466755","created_date":"2022-07-16"}