{"id":"https://openalex.org/W4388367995","doi":"https://doi.org/10.48550/arxiv.2311.01064","title":"Multimodal Foundation Models for Zero-shot Animal Species Recognition in Camera Trap Images","display_name":"Multimodal Foundation Models for Zero-shot Animal Species Recognition in Camera Trap Images","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4388367995","doi":"https://doi.org/10.48550/arxiv.2311.01064"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2311.01064","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2311.01064","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5088599790","display_name":"Zalan Fabian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fabian, Zalan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069470446","display_name":"Zhongqi Miao","orcid":"https://orcid.org/0000-0002-0439-8592"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Miao, Zhongqi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107893340","display_name":"Chunyuan Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Chunyuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044677327","display_name":"Yuanhan Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Yuanhan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100406050","display_name":"Ziwei Liu","orcid":"https://orcid.org/0000-0002-4220-5958"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Ziwei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089602430","display_name":"Andr\u00e9s Hern\u00e1ndez","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hern\u00e1ndez, Andr\u00e9s","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053660857","display_name":"Andr\u00e9s Montes\u2010Rojas","orcid":"https://orcid.org/0000-0001-6976-5552"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Montes-Rojas, Andr\u00e9s","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5093197465","display_name":"Rafael Escucha","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Escucha, Rafael","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5093197466","display_name":"Laura C. Siabatto","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Siabatto, Laura","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003256168","display_name":"Andr\u00e9s Link","orcid":"https://orcid.org/0000-0003-3125-249X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Link, Andr\u00e9s","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013512298","display_name":"Pablo Arbel\u00e1ez","orcid":"https://orcid.org/0000-0001-5244-2407"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Arbel\u00e1ez, Pablo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022013110","display_name":"Rahul Dodhia","orcid":"https://orcid.org/0000-0003-3812-3906"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dodhia, Rahul","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5025368433","display_name":"Juan Lavista Ferres","orcid":"https://orcid.org/0000-0002-9654-3178"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ferres, Juan Lavista","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.919075,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":82,"max":85},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9568,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9382,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/camera-trap","display_name":"Camera trap","score":0.7541648},{"id":"https://openalex.org/keywords/trap","display_name":"Trap (plumbing)","score":0.5846714}],"concepts":[{"id":"https://openalex.org/C2779101711","wikidata":"https://www.wikidata.org/wiki/Q1723004","display_name":"Camera trap","level":3,"score":0.7541648},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.72434485},{"id":"https://openalex.org/C121099081","wikidata":"https://www.wikidata.org/wiki/Q665580","display_name":"Trap (plumbing)","level":2,"score":0.5846714},{"id":"https://openalex.org/C2778344882","wikidata":"https://www.wikidata.org/wiki/Q278938","display_name":"Shot (pellet)","level":2,"score":0.5430403},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.50144076},{"id":"https://openalex.org/C547195049","wikidata":"https://www.wikidata.org/wiki/Q1725664","display_name":"Terminology","level":2,"score":0.48788095},{"id":"https://openalex.org/C4554734","wikidata":"https://www.wikidata.org/wiki/Q593744","display_name":"Knowledge base","level":2,"score":0.46236235},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.40686035},{"id":"https://openalex.org/C29376679","wikidata":"https://www.wikidata.org/wiki/Q241741","display_name":"Wildlife","level":2,"score":0.40615103},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.35682},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.122493684},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.111486554},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2311.01064","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2311.01064","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2311.01064","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"score":0.58,"display_name":"Life on land","id":"https://metadata.un.org/sdg/15"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4394211400","https://openalex.org/W4393982837","https://openalex.org/W4214877189","https://openalex.org/W2980279061","https://openalex.org/W2773965352","https://openalex.org/W2381179799","https://openalex.org/W2366718574","https://openalex.org/W2334685461","https://openalex.org/W2199813689","https://openalex.org/W2074502265"],"abstract_inverted_index":{"Due":[0],"to":[1,39,97,109,117,126,137,154],"deteriorating":[2],"environmental":[3],"conditions":[4],"and":[5,25,147],"increasing":[6],"human":[7,71],"activity,":[8],"conservation":[9],"efforts":[10],"directed":[11],"towards":[12],"wildlife":[13,27,65],"is":[14],"crucial.":[15],"Motion-activated":[16],"camera":[17,103,167],"traps":[18],"constitute":[19],"an":[20,118],"efficient":[21],"tool":[22],"for":[23,142],"tracking":[24,66],"monitoring":[26],"populations":[28],"across":[29],"the":[30,52,114,128,160,172],"globe.":[31],"Supervised":[32],"learning":[33],"techniques":[34,46,136],"have":[35],"been":[36],"successfully":[37],"deployed":[38],"analyze":[40],"such":[41,45],"imagery,":[42],"however":[43],"training":[44],"requires":[47],"annotations":[48],"from":[49],"experts.":[50,110],"Reducing":[51],"reliance":[53],"on":[54,164],"costly":[55],"labelled":[56],"data":[57],"therefore":[58],"has":[59],"immense":[60],"potential":[61],"in":[62,124,130,171],"developing":[63],"large-scale":[64],"solutions":[67],"with":[68],"markedly":[69],"less":[70],"labor.":[72],"In":[73,90],"this":[74],"work":[75],"we":[76,92,112],"propose":[77,148],"WildMatch,":[78],"a":[79,131,149,165],"novel":[80,150],"zero-shot":[81,132],"species":[82,129],"classification":[83],"framework":[84],"that":[85],"leverages":[86],"multimodal":[87],"foundation":[88],"models.":[89],"particular,":[91],"instruction":[93,139],"tune":[94],"vision-language":[95],"models":[96],"generate":[98],"detailed":[99,143],"visual":[100],"descriptions":[101,123],"of":[102,122,162,176],"trap":[104,168],"images":[105],"using":[106],"similar":[107],"terminology":[108],"Then,":[111],"match":[113],"generated":[115],"caption":[116,156],"external":[119],"knowledge":[120,151],"base":[121],"order":[125],"determine":[127],"manner.":[133],"We":[134,158],"investigate":[135],"build":[138],"tuning":[140],"datasets":[141],"animal":[144],"description":[145],"generation":[146],"augmentation":[152],"technique":[153],"enhance":[155],"quality.":[157],"demonstrate":[159],"performance":[161],"WildMatch":[163],"new":[166],"dataset":[169],"collected":[170],"Magdalena":[173],"Medio":[174],"region":[175],"Colombia.":[177]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4388367995","counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1}],"updated_date":"2025-04-22T20:55:12.003060","created_date":"2023-11-05"}