{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T05:53:20Z","timestamp":1740117200051,"version":"3.37.3"},"reference-count":74,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2023,12,1]],"date-time":"2023-12-01T00:00:00Z","timestamp":1701388800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2023,12,1]],"date-time":"2023-12-01T00:00:00Z","timestamp":1701388800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2023,12,1]],"date-time":"2023-12-01T00:00:00Z","timestamp":1701388800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2023,12,1]],"date-time":"2023-12-01T00:00:00Z","timestamp":1701388800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2023,12,1]],"date-time":"2023-12-01T00:00:00Z","timestamp":1701388800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,12,1]],"date-time":"2023-12-01T00:00:00Z","timestamp":1701388800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100021202","name":"Alliance de recherche num\u00e9rique du Canada","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100021202","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100000038","name":"Natural Sciences and Engineering Research Council of Canada","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100000038","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004489","name":"Mitacs","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100004489","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Image and Vision Computing"],"published-print":{"date-parts":[[2023,12]]},"DOI":"10.1016\/j.imavis.2023.104838","type":"journal-article","created":{"date-parts":[[2023,10,13]],"date-time":"2023-10-13T01:47:26Z","timestamp":1697161646000},"page":"104838","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":6,"special_numbering":"C","title":["DiPS: Discriminative pseudo-label sampling with self-supervised transformers for weakly supervised object localization"],"prefix":"10.1016","volume":"140","author":[{"given":"Shakeeb","family":"Murtaza","sequence":"first","affiliation":[]},{"given":"Soufiane","family":"Belharbi","sequence":"additional","affiliation":[]},{"given":"Marco","family":"Pedersoli","sequence":"additional","affiliation":[]},{"given":"Aydin","family":"Sarraf","sequence":"additional","affiliation":[]},{"given":"Eric","family":"Granger","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.imavis.2023.104838_bb0005","series-title":"ICCV","article-title":"Emerging properties in self-supervised vision transformers","author":"Caron","year":"2021"},{"key":"10.1016\/j.imavis.2023.104838_bb0010","series-title":"ICCV","first-page":"2886","article-title":"TS-CAM: Token semantic coupled attention map for weakly supervised object localization","author":"Gao","year":"2021"},{"key":"10.1016\/j.imavis.2023.104838_bb0015","series-title":"CVPR","first-page":"2921","article-title":"Learning deep features for discriminative localization","author":"Zhou","year":"2016"},{"key":"10.1016\/j.imavis.2023.104838_bb0020","series-title":"CVPR","article-title":"Is object localization for free? weakly-supervised learning with convolutional neural networks","author":"Oquab","year":"2015"},{"key":"10.1016\/j.imavis.2023.104838_bb0025","series-title":"CVPR","article-title":"Deep residual learning for image recognition","author":"He","year":"2016"},{"key":"10.1016\/j.imavis.2023.104838_bb0030","series-title":"CVPR","article-title":"Evaluating weakly supervised object localization methods right","author":"Choe","year":"2020"},{"key":"10.1016\/j.imavis.2023.104838_bb0035","series-title":"CVPR","article-title":"You only look once: Unified, real-time object detection","author":"Redmon","year":"2016"},{"issue":"4","key":"10.1016\/j.imavis.2023.104838_bb0040","doi-asserted-by":"crossref","first-page":"834","DOI":"10.1109\/TPAMI.2017.2699184","article-title":"Deeplab: Semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected crfs","volume":"40","author":"Chen","year":"2018","journal-title":"PAMI"},{"key":"10.1016\/j.imavis.2023.104838_bb0045","article-title":"CoLo-CAM: Class activation mapping for object colocalization in weakly-labeled unconstrained videos","author":"Belharbi","year":"2023","journal-title":"CoRR"},{"key":"10.1016\/j.imavis.2023.104838_bb0050","series-title":"WACV","article-title":"F-CAM: Full resolution class activation maps via guided parametric upscaling","author":"Belharbi","year":"2022"},{"key":"10.1016\/j.imavis.2023.104838_bb0055","series-title":"CVPR","article-title":"Attention-based dropout layer for weakly supervised object localization","author":"Choe","year":"2019"},{"key":"10.1016\/j.imavis.2023.104838_bb0060","series-title":"CVPR","article-title":"Ficklenet: Weakly and semi-supervised semantic image segmentation using stochastic inference","author":"Lee","year":"2019"},{"key":"10.1016\/j.imavis.2023.104838_bb0065","series-title":"ECCV","article-title":"Pairwise similarity knowledge transfer for weakly supervised object localization","author":"Rahimi","year":"2020"},{"key":"10.1016\/j.imavis.2023.104838_bb0070","series-title":"ICCV","article-title":"Hide-and-seek: Forcing a network to be meticulous for weakly-supervised object and action localization","author":"Singh","year":"2017"},{"key":"10.1016\/j.imavis.2023.104838_bb0075","series-title":"CVPR","article-title":"Shallow feature matters for weakly supervised object localization","author":"Wei","year":"2021"},{"key":"10.1016\/j.imavis.2023.104838_bb0080","series-title":"CVPR","article-title":"Object region mining with adversarial erasing: A simple classification to semantic segmentation approach","author":"Wei","year":"2017"},{"key":"10.1016\/j.imavis.2023.104838_bb0085","series-title":"ICCV","article-title":"DANET: Divergent activation for weakly supervised object localization","author":"Xue","year":"2019"},{"key":"10.1016\/j.imavis.2023.104838_bb0090","series-title":"WACV","article-title":"Combinational class activation maps for weakly supervised object localization","author":"Yang","year":"2020"},{"key":"10.1016\/j.imavis.2023.104838_bb0095","series-title":"ICCV","article-title":"Cutmix: Regularization strategy to train strong classifiers with localizable features","author":"Yun","year":"2019"},{"key":"10.1016\/j.imavis.2023.104838_bb0100","series-title":"CVPR","article-title":"Rethinking the route towards weakly supervised object localization","author":"Zhang","year":"2020"},{"key":"10.1016\/j.imavis.2023.104838_bb0105","series-title":"ECCV","article-title":"Self-produced guidance for weakly-supervised object localization","author":"Zhang","year":"2018"},{"key":"10.1016\/j.imavis.2023.104838_bb0110","doi-asserted-by":"crossref","first-page":"96","DOI":"10.59275\/j.melba.2023-5g54","article-title":"Deep weakly-supervised learning methods for classification and localization in histology images: a survey","volume":"2","author":"Rony","year":"2023","journal-title":"Machine Learning for Biomedical Imaging"},{"key":"10.1016\/j.imavis.2023.104838_bb0115","series-title":"CVPR","article-title":"Rethinking the route towards weakly supervised object localization","author":"Zhang","year":"2020"},{"key":"10.1016\/j.imavis.2023.104838_bb0120","series-title":"CVPR","article-title":"Shallow feature matters for weakly supervised object localization","author":"Wei","year":"2021"},{"key":"10.1016\/j.imavis.2023.104838_bb0125","series-title":"MIDL","article-title":"Negative evidence matters in interpretable histology image classification","author":"Belharbi","year":"2022"},{"key":"10.1016\/j.imavis.2023.104838_bb0130","series-title":"WACV","article-title":"TCAM: Temporal class activation maps for object localization in weakly-labeled unconstrained videos","author":"Belharbi","year":"2023"},{"issue":"1","key":"10.1016\/j.imavis.2023.104838_bb0135","doi-asserted-by":"crossref","first-page":"62","DOI":"10.1109\/TSMC.1979.4310076","article-title":"A threshold selection method from gray-level histograms","volume":"9","author":"Otsu","year":"1979","journal-title":"IEEE Transactions on Systems, Man, and Cybernetics"},{"key":"10.1016\/j.imavis.2023.104838_bb0140","series-title":"ECCV","article-title":"Weakly supervised object localization via transformer with implicit spatial calibration","author":"Bai","year":"2022"},{"key":"10.1016\/j.imavis.2023.104838_bb0145","series-title":"AAAI","article-title":"LCTR: On awakening the local continuity of transformer for weakly supervised object localization","author":"Chen","year":"2022"},{"key":"10.1016\/j.imavis.2023.104838_bb0150","series-title":"CVPRw","article-title":"ViTOL: Vision transformer for weakly supervised object localization","author":"Gupta","year":"2022"},{"key":"10.1016\/j.imavis.2023.104838_bb0155","series-title":"BMVC","article-title":"Re-attention transformer for weakly supervised object localization","author":"Su","year":"2022"},{"key":"10.1016\/j.imavis.2023.104838_bb0160","series-title":"WACV Workshops","article-title":"Discriminative sampling of proposals in self-supervised transformers for weakly supervised object localization","author":"Murtaza","year":"2023"},{"key":"10.1016\/j.imavis.2023.104838_bb0165","series-title":"ECCV","article-title":"On regularized losses for weakly-supervised cnn segmentation","author":"Tang","year":"2018"},{"key":"10.1016\/j.imavis.2023.104838_bb0170","series-title":"WACV","article-title":"Grad-CAM++: Generalized gradient-based visual explanations for deep convolutional networks","author":"Chattopadhay","year":"2018"},{"key":"10.1016\/j.imavis.2023.104838_bb0175","series-title":"BMVC","article-title":"Axiom-based grad-cam: Towards accurate visualization and explanation of CNNS","author":"Fu","year":"2020"},{"key":"10.1016\/j.imavis.2023.104838_bb0180","series-title":"WACV","article-title":"Ablation-cam: Visual explanations for deep convolutional network via gradient-free localization","author":"Ramaswamy","year":"2020"},{"key":"10.1016\/j.imavis.2023.104838_bb0185","series-title":"ICCV","first-page":"618","article-title":"Grad-CAM: Visual explanations from deep networks via gradient-based localization","author":"Selvaraju","year":"2017"},{"key":"10.1016\/j.imavis.2023.104838_bb0190","series-title":"CVPR","first-page":"1325","article-title":"Adversarial complementary learning for weakly supervised object localization","author":"Zhang","year":"2018"},{"key":"10.1016\/j.imavis.2023.104838_bb0195","article-title":"IS-CAM: Integrated score-cam for axiomatic-based explanations","author":"Naidu","year":"2020","journal-title":"CoRR"},{"key":"10.1016\/j.imavis.2023.104838_bb0200","article-title":"SS-CAM: Smoothed score-cam for sharper visual feature localization","author":"Wang","year":"2020","journal-title":"CoRR"},{"key":"10.1016\/j.imavis.2023.104838_bb0205","series-title":"CVPR Workshops","article-title":"Score-CAM: Score-weighted visual explanations for convolutional neural networks","author":"Wang","year":"2020"},{"key":"10.1016\/j.imavis.2023.104838_bb0210","series-title":"CVPR","article-title":"Revisiting dilated convolution: A simple approach for weakly-and semi-supervised semantic segmentation","author":"Wei","year":"2018"},{"key":"10.1016\/j.imavis.2023.104838_bb0215","series-title":"CVPR","article-title":"Background activation suppression for weakly supervised object localization","author":"Wu","year":"2022"},{"year":"2023","series-title":"Background-aware Classification Activation Map for Weakly Supervised Object Localization","author":"Zhu","key":"10.1016\/j.imavis.2023.104838_bb0220"},{"key":"10.1016\/j.imavis.2023.104838_bb0225","series-title":"ICCV","article-title":"Foreground activation maps for weakly supervised object localization","author":"Meng","year":"2021"},{"key":"10.1016\/j.imavis.2023.104838_bb0230","series-title":"ICCV","article-title":"Online refinement of low-level feature based activation map for weakly supervised object localization","author":"Xie","year":"2021"},{"key":"10.1016\/j.imavis.2023.104838_bb0235","series-title":"ECCV","first-page":"271","article-title":"Inter-image communication for weakly supervised localization","author":"Zhang","year":"2020"},{"key":"10.1016\/j.imavis.2023.104838_bb0240","series-title":"CVPR","first-page":"11642","article-title":"Unveiling the potential of structure preserving for weakly supervised object localization","author":"Pan","year":"2021"},{"key":"10.1016\/j.imavis.2023.104838_bb0245","series-title":"CVPR","article-title":"Network dissection: Quantifying interpretability of deep visual representations","author":"Bau","year":"2017"},{"key":"10.1016\/j.imavis.2023.104838_bb0250","series-title":"ECCV","article-title":"Visualizing and understanding convolutional networks","author":"Zeiler","year":"2014"},{"key":"10.1016\/j.imavis.2023.104838_bb0255","series-title":"CVPR","article-title":"Weakly-supervised semantic segmentation by iteratively mining common object features","author":"Wang","year":"2018"},{"key":"10.1016\/j.imavis.2023.104838_bb0260","series-title":"CVPR","article-title":"Self-supervised equivariant attention mechanism for weakly supervised semantic segmentation","author":"Wang","year":"2020"},{"key":"10.1016\/j.imavis.2023.104838_bb0265","series-title":"ACCV","article-title":"In-sample contrastive learning and consistent attention for weakly supervised object localization","author":"Ki","year":"2020"},{"key":"10.1016\/j.imavis.2023.104838_bb0270","article-title":"CaFT: Clustering and filter on tokens of transformer for weakly supervised object localization","author":"Li","year":"2022","journal-title":"CoRR"},{"key":"10.1016\/j.imavis.2023.104838_bb0275","doi-asserted-by":"crossref","first-page":"7130","DOI":"10.1109\/TIP.2022.3220055","article-title":"Adversarial transformers for weakly supervised object localization","volume":"31","author":"Meng","year":"2022","journal-title":"IEEE Transactions on Image Processing"},{"key":"10.1016\/j.imavis.2023.104838_bb0280","series-title":"ACL","article-title":"Quantifying attention flow in transformers","author":"Abnar","year":"2020"},{"key":"10.1016\/j.imavis.2023.104838_bb0285","series-title":"ICLR","article-title":"An image is worth 16x16 words: Transformers for image recognition at scale","author":"Dosovitskiy","year":"2021"},{"key":"10.1016\/j.imavis.2023.104838_bb0290","series-title":"ICCV","article-title":"Understanding deep networks via extremal perturbations and smooth masks","author":"Fong","year":"2019"},{"key":"10.1016\/j.imavis.2023.104838_bb0295","series-title":"ECCV","article-title":"Seed, expand and constrain: Three principles for weakly supervised image segmentation","author":"Kolesnikov","year":"2016"},{"issue":"3","key":"10.1016\/j.imavis.2023.104838_bb0300","doi-asserted-by":"crossref","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","article-title":"ImageNet large scale visual recognition challenge","volume":"115","author":"Russakovsky","year":"2015","journal-title":"IJCV"},{"year":"2011","series-title":"Efficient Inference in Fully Connected CRFS with Gaussian Edge Potentials","author":"Krahenbuhl","key":"10.1016\/j.imavis.2023.104838_bb0305"},{"key":"10.1016\/j.imavis.2023.104838_bb0310","series-title":"CVPR","first-page":"11700","article-title":"Large-scale interactive object segmentation with human annotators","author":"Benenson","year":"2019"},{"year":"2010","series-title":"Caltech-UCSD Birds 200. Technical Report CNS-TR-2010-001","author":"Welinder","key":"10.1016\/j.imavis.2023.104838_bb0315"},{"key":"10.1016\/j.imavis.2023.104838_bb0320","series-title":"ECCV","first-page":"618","article-title":"Rethinking class activation mapping for weakly supervised object localization","author":"Bae","year":"2020"},{"key":"10.1016\/j.imavis.2023.104838_bb0325","series-title":"ICCV","first-page":"3427","article-title":"Normalization matters in weakly supervised object localization","author":"Kim","year":"2021"},{"key":"10.1016\/j.imavis.2023.104838_bb0330","doi-asserted-by":"crossref","first-page":"1774","DOI":"10.1109\/TIP.2022.3145238","article-title":"Diverse complementary part mining for weakly supervised object localization","volume":"31","author":"Meng","year":"2022","journal-title":"IEEE Transactions on Image Processing"},{"key":"10.1016\/j.imavis.2023.104838_bb0335","series-title":"CVPR","article-title":"C2AM: Contrastive learning of class-agnostic activation map for weakly supervised object localization and semantic segmentation","author":"Xie","year":"2022"},{"key":"10.1016\/j.imavis.2023.104838_bb0340","series-title":"CVPR","article-title":"Bridging the gap between classification and localization forweakly supervised object localization","author":"Kim","year":"2022"},{"key":"10.1016\/j.imavis.2023.104838_bb0345","series-title":"ECCV","article-title":"Bagging regional classification activation maps for weakly supervised object localization","author":"Zhu","year":"2022"},{"key":"10.1016\/j.imavis.2023.104838_bb0350","series-title":"CVPR","article-title":"Cream: Weakly supervised object localization via class re-activation mapping","author":"Xu","year":"2022"},{"key":"10.1016\/j.imavis.2023.104838_bb0355","series-title":"ICLR","article-title":"Efficient self-supervised vision transformers for representation learning","author":"Li","year":"2022"},{"key":"10.1016\/j.imavis.2023.104838_bb0360","series-title":"ICCV","article-title":"An empirical study of training self-supervised vision transformers","author":"Chen","year":"2021"},{"key":"10.1016\/j.imavis.2023.104838_bb0365","series-title":"CVPR","article-title":"SimMIM: A simple framework for masked image modeling","author":"Xie","year":"2022"},{"key":"10.1016\/j.imavis.2023.104838_bb0370","series-title":"CVPR","article-title":"CLIP is also an efficient segmenter: A text-driven approach for weakly supervised semantic segmentation","author":"Lin","year":"2023"}],"container-title":["Image and Vision Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0262885623002123?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0262885623002123?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2023,12,5]],"date-time":"2023-12-05T21:20:37Z","timestamp":1701811237000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0262885623002123"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12]]},"references-count":74,"alternative-id":["S0262885623002123"],"URL":"https:\/\/doi.org\/10.1016\/j.imavis.2023.104838","relation":{},"ISSN":["0262-8856"],"issn-type":[{"type":"print","value":"0262-8856"}],"subject":[],"published":{"date-parts":[[2023,12]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"DiPS: Discriminative pseudo-label sampling with self-supervised transformers for weakly supervised object localization","name":"articletitle","label":"Article Title"},{"value":"Image and Vision Computing","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.imavis.2023.104838","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2023 Elsevier B.V. All rights reserved.","name":"copyright","label":"Copyright"}],"article-number":"104838"}}