{"id":"https://openalex.org/W4311997190","doi":"https://doi.org/10.48550/arxiv.2212.08653","title":"Attentive Mask CLIP","display_name":"Attentive Mask CLIP","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4311997190","doi":"https://doi.org/10.48550/arxiv.2212.08653"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2212.08653","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":null,"is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"journal-article","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2212.08653","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5104479074","display_name":"Yifan Yang","orcid":"https://orcid.org/0009-0002-6868-1220"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Yifan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109188830","display_name":"Weiquan Huang","orcid":"https://orcid.org/0009-0001-4976-711X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Weiquan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009248953","display_name":"Yixuan Wei","orcid":"https://orcid.org/0000-0003-1775-7301"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wei, Yixuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080495327","display_name":"Houwen Peng","orcid":"https://orcid.org/0000-0001-8544-8952"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Peng, Houwen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011424573","display_name":"Xinyang Jiang","orcid":"https://orcid.org/0000-0002-4991-0596"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiang, Xinyang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070156365","display_name":"Huiqiang Jiang","orcid":"https://orcid.org/0000-0002-1327-4882"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiang, Huiqiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090973869","display_name":"Fangyun Wei","orcid":"https://orcid.org/0000-0001-8784-4916"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wei, Fangyun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043176070","display_name":"Yin Wang","orcid":"https://orcid.org/0000-0002-8137-7496"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Yin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091049278","display_name":"Hu Han","orcid":"https://orcid.org/0000-0001-6010-1792"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hu, Han","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032760128","display_name":"Lili Qiu","orcid":"https://orcid.org/0000-0002-1590-9749"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qiu, Lili","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5003549337","display_name":"Yuqing Yang","orcid":"https://orcid.org/0000-0002-2062-3278"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Yuqing","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":59},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11775","display_name":"COVID-19 diagnosis using AI","score":0.9938,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T11775","display_name":"COVID-19 diagnosis using AI","score":0.9938,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9909,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10862","display_name":"AI in cancer detection","score":0.9791,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.78919196},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.709531},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.53661907},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.52262264},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.4585528},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3717416},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2212.08653","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":null,"is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2212.08653","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2212.08653","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":null,"is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4388335561","https://openalex.org/W4385572700","https://openalex.org/W4385009901","https://openalex.org/W4307309205","https://openalex.org/W4288261899","https://openalex.org/W2997152889","https://openalex.org/W2970530566","https://openalex.org/W2967478618","https://openalex.org/W2042327336","https://openalex.org/W2033914206"],"abstract_inverted_index":{"Image":[0],"token":[1,71,123],"removal":[2,72,124],"is":[3,174],"an":[4,56,69,95],"efficient":[5,18,133,230],"augmentation":[6,19,137],"strategy":[7,20],"for":[8,74,125],"reducing":[9],"the":[10,27,45,86,99,103,110,118,140,154,222,239],"cost":[11],"of":[12,29,39,102,121,232,246],"computing":[13],"image":[14,40],"features.":[15],"However,":[16],"this":[17,65],"has":[21],"been":[22],"found":[23],"to":[24,85,134,139,158],"adversely":[25],"affect":[26],"accuracy":[28,195,208],"CLIP-based":[30],"training.":[31,62,127],"We":[32],"hypothesize":[33],"that":[34,109,162],"removing":[35],"a":[36,50,81],"large":[37],"portion":[38],"tokens":[41,79],"may":[42],"improperly":[43],"discard":[44],"semantic":[46,83],"content":[47],"associated":[48],"with":[49,80],"given":[51],"text":[52,87],"description,":[53],"thus":[54],"constituting":[55],"incorrect":[57],"pairing":[58],"target":[59],"in":[60,94],"CLIP":[61,75,126,155,160,241],"To":[63],"address":[64],"issue,":[66],"we":[67],"propose":[68],"attentive":[70,112],"approach":[73,114,129,191,234],"training,":[76],"which":[77,214],"retains":[78],"high":[82],"correlation":[84,90],"description.":[88],"The":[89,128],"scores":[91],"are":[92,215],"computed":[93],"online":[96],"fashion":[97],"using":[98,185],"EMA":[100],"version":[101,231],"visual":[104],"encoder.":[105],"Our":[106],"experiments":[107],"show":[108],"proposed":[111],"masking":[113],"performs":[115],"better":[116],"than":[117,221,238],"previous":[119],"method":[120,173],"random":[122],"also":[130,180],"makes":[131],"it":[132],"apply":[135],"multiple":[136],"views":[138,152],"image,":[141],"as":[142,144,168,200,202],"well":[143,201],"introducing":[145],"instance":[146],"contrastive":[147],"learning":[148],"tasks":[149],"between":[150],"these":[151,252],"into":[153],"framework.":[156],"Compared":[157],"other":[159],"improvements":[161],"combine":[163],"different":[164],"pre-training":[165],"targets":[166],"such":[167],"SLIP":[169,223],"and":[170,187,204,211,218,249],"MaskCLIP,":[171],"our":[172,190,233],"not":[175],"only":[176],"more":[177,182],"effective,":[178],"but":[179],"much":[181],"efficient.":[183],"Specifically,":[184],"ViT-B":[186],"YFCC-15M":[188],"dataset,":[189],"achieves":[192,243],"$43.9\\%$":[193],"top-1":[194],"on":[196,209,251],"ImageNet-1K":[197],"zero-shot":[198],"classification,":[199],"$62.7/42.1$":[203],"$38.0/23.2$":[205],"I2T/T2I":[206],"retrieval":[207],"Flickr30K":[210],"MS":[212],"COCO,":[213],"$+1.1\\%$,":[216],"$+5.5/+0.9$,":[217],"$+4.4/+1.3$":[219],"higher":[220],"method,":[224],"while":[225],"being":[226],"$2.30\\times$":[227],"faster.":[228],"An":[229],"running":[235],"$1.16\\times$":[236],"faster":[237],"plain":[240],"model":[242],"significant":[244],"gains":[245],"$+5.3\\%$,":[247],"$+11.3/+8.0$,":[248],"$+9.5/+4.9$":[250],"benchmarks.":[253]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4311997190","counts_by_year":[],"updated_date":"2025-03-06T04:11:06.388076","created_date":"2023-01-03"}