{"id":"https://openalex.org/W4385018909","doi":"https://doi.org/10.48550/arxiv.2307.10455","title":"A Step Towards Worldwide Biodiversity Assessment: The BIOSCAN-1M Insect Dataset","display_name":"A Step Towards Worldwide Biodiversity Assessment: The BIOSCAN-1M Insect Dataset","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4385018909","doi":"https://doi.org/10.48550/arxiv.2307.10455"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2307.10455","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2307.10455","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5017564607","display_name":"Zahra Gharaee","orcid":"https://orcid.org/0000-0003-0140-0025"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gharaee, Zahra","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111159301","display_name":"ZeMing Gong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gong, ZeMing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020519120","display_name":"Nicholas Pellegrino","orcid":"https://orcid.org/0000-0002-2102-2431"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pellegrino, Nicholas","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015745351","display_name":"Iuliia Zarubiieva","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zarubiieva, Iuliia","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040831022","display_name":"Joakim Bruslund Haurum","orcid":"https://orcid.org/0000-0002-0544-0422"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Haurum, Joakim Bruslund","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043651760","display_name":"Scott Lowe","orcid":"https://orcid.org/0000-0002-5237-3867"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lowe, Scott C.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075855500","display_name":"Jaclyn McKeown","orcid":"https://orcid.org/0009-0005-7193-2643"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"McKeown, Jaclyn T. A.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113158901","display_name":"Chris C.Y. Ho","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ho, Chris C. Y.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018941776","display_name":"Joschka McLeod","orcid":"https://orcid.org/0000-0002-7503-1835"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"McLeod, Joschka","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083579946","display_name":"Yiyun Wei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wei, Yi-Yun C","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036523648","display_name":"Jireh Agda","orcid":"https://orcid.org/0009-0004-5235-9610"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Agda, Jireh","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046508393","display_name":"Sujeevan Ratnasingham","orcid":"https://orcid.org/0000-0002-3694-0171"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ratnasingham, Sujeevan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068074654","display_name":"Dirk Steinke","orcid":"https://orcid.org/0000-0002-8992-575X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Steinke, Dirk","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044978994","display_name":"Anne Lynn S. Chang","orcid":"https://orcid.org/0000-0002-2869-2147"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chang, Angel X.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102922725","display_name":"Graham W. Taylor","orcid":"https://orcid.org/0000-0001-5867-3652"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Taylor, Graham W.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5078015739","display_name":"Paul Fieguth","orcid":"https://orcid.org/0000-0001-7260-2260"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fieguth, Paul","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.727403,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":67,"max":78},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10895","display_name":"Species Distribution and Climate Change","score":0.9948,"subfield":{"id":"https://openalex.org/subfields/2302","display_name":"Ecological Modeling"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10895","display_name":"Species Distribution and Climate Change","score":0.9948,"subfield":{"id":"https://openalex.org/subfields/2302","display_name":"Ecological Modeling"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12640","display_name":"Environmental DNA in Biodiversity Studies","score":0.9861,"subfield":{"id":"https://openalex.org/subfields/2303","display_name":"Ecology"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12388","display_name":"Identification and Quantification in Food","score":0.9761,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/barcode","display_name":"Barcode","score":0.85165095},{"id":"https://openalex.org/keywords/taxonomic-rank","display_name":"Taxonomic rank","score":0.48768437},{"id":"https://openalex.org/keywords/contextual-image-classification","display_name":"Contextual image classification","score":0.47313204},{"id":"https://openalex.org/keywords/biological-classification","display_name":"Biological classification","score":0.43345094},{"id":"https://openalex.org/keywords/dna-barcoding","display_name":"DNA Barcoding","score":0.43036523}],"concepts":[{"id":"https://openalex.org/C2776841711","wikidata":"https://www.wikidata.org/wiki/Q856","display_name":"Barcode","level":2,"score":0.85165095},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.72709167},{"id":"https://openalex.org/C130217890","wikidata":"https://www.wikidata.org/wiki/Q47041","display_name":"Biodiversity","level":2,"score":0.71028125},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5473818},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.52918476},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.52409095},{"id":"https://openalex.org/C189592816","wikidata":"https://www.wikidata.org/wiki/Q427626","display_name":"Taxonomic rank","level":3,"score":0.48768437},{"id":"https://openalex.org/C75294576","wikidata":"https://www.wikidata.org/wiki/Q5165192","display_name":"Contextual image classification","level":3,"score":0.47313204},{"id":"https://openalex.org/C48702757","wikidata":"https://www.wikidata.org/wiki/Q8269924","display_name":"Biological classification","level":2,"score":0.43345094},{"id":"https://openalex.org/C141732470","wikidata":"https://www.wikidata.org/wiki/Q1154642","display_name":"DNA barcoding","level":2,"score":0.43036523},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.34562385},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.21534723},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.16950345},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.16393018},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.067365944},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C71640776","wikidata":"https://www.wikidata.org/wiki/Q16521","display_name":"Taxon","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2307.10455","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2307.10455","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2307.10455","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/15","score":0.61,"display_name":"Life on land"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W3080270964","https://openalex.org/W2911724990","https://openalex.org/W2509660947","https://openalex.org/W2402613932","https://openalex.org/W2394007119","https://openalex.org/W2389721777","https://openalex.org/W2360712899","https://openalex.org/W2111456242","https://openalex.org/W2097619744","https://openalex.org/W2032317445"],"abstract_inverted_index":{"In":[0],"an":[1,26,137],"effort":[2],"to":[3,59,84,96,151],"catalog":[4],"insect":[5,15],"biodiversity,":[6],"we":[7],"propose":[8],"a":[9,54,99,110,115,156,178],"new":[10],"large":[11],"dataset":[12,71,166],"of":[13,64,78,82,147,159,177],"hand-labelled":[14],"images,":[16],"the":[17,70,76,85,92,97,130,144,153,165,169,173],"BIOSCAN-Insect":[18],"Dataset.":[19],"Each":[20],"record":[21],"is":[22,104,109],"taxonomically":[23],"classified":[24],"by":[25,91],"expert,":[27],"and":[28,39,167,175],"also":[29,72,142],"has":[30],"associated":[31],"genetic":[32],"information":[33],"including":[34],"raw":[35],"nucleotide":[36],"barcode":[37,41],"sequences":[38],"assigned":[40],"index":[42],"numbers,":[43],"which":[44,79],"are":[45],"genetically-based":[46],"proxies":[47],"for":[48,155],"species":[49],"classification.":[50],"This":[51,162],"paper":[52,163],"presents":[53,73],"curated":[55],"million-image":[56],"dataset,":[57,98],"primarily":[58],"train":[60],"computer-vision":[61],"models":[62],"capable":[63],"providing":[65],"image-based":[66,138],"taxonomic":[67,107,139],"assessment,":[68],"however,":[69],"compelling":[74],"characteristics,":[75],"study":[77],"would":[80],"be":[81],"interest":[83,125],"broader":[86],"machine":[87,131],"learning":[88,132],"community.":[89],"Driven":[90],"biological":[93],"nature":[94],"inherent":[95],"characteristic":[100],"long-tailed":[101],"class-imbalance":[102],"distribution":[103],"exhibited.":[105],"Furthermore,":[106],"labelling":[108],"hierarchical":[111],"classification":[112,118,170],"scheme,":[113],"presenting":[114],"highly":[116],"fine-grained":[117],"problem":[119],"at":[120],"lower":[121],"levels.":[122],"Beyond":[123],"spurring":[124],"in":[126],"biodiversity":[127],"research":[128],"within":[129],"community,":[133],"progress":[134],"on":[135],"creating":[136],"classifier":[140],"will":[141],"further":[143],"ultimate":[145],"goal":[146],"all":[148],"BIOSCAN":[149],"research:":[150],"lay":[152],"foundation":[154],"comprehensive":[157],"survey":[158],"global":[160],"biodiversity.":[161],"introduces":[164],"explores":[168],"task":[171],"through":[172],"implementation":[174],"analysis":[176],"baseline":[179],"classifier.":[180]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4385018909","counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-01-04T15:57:52.452812","created_date":"2023-07-22"}