{"id":"https://openalex.org/W4400374481","doi":"https://doi.org/10.48550/arxiv.2407.02389","title":"SafaRi:Adaptive Sequence Transformer for Weakly Supervised Referring\n Expression Segmentation","display_name":"SafaRi:Adaptive Sequence Transformer for Weakly Supervised Referring\n Expression Segmentation","publication_year":2024,"publication_date":"2024-07-02","ids":{"openalex":"https://openalex.org/W4400374481","doi":"https://doi.org/10.48550/arxiv.2407.02389"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.02389","pdf_url":"http://arxiv.org/pdf/2407.02389","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2407.02389","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101977185","display_name":"Sayan Nag","orcid":"https://orcid.org/0000-0001-5652-125X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nag, Sayan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083954626","display_name":"Koustava Goswami","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Goswami, Koustava","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5044410531","display_name":"Srikrishna Karanam","orcid":"https://orcid.org/0000-0002-7627-7765"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Karanam, Srikrishna","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":84},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9042,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9042,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.47840437}],"concepts":[{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.6558317},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.60872465},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4795608},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.47840437},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4435298},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.37162977},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.195506},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.19399202},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.1444115},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.09349328},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.02389","pdf_url":"http://arxiv.org/pdf/2407.02389","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.02389","pdf_url":"http://arxiv.org/pdf/2407.02389","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4395685956","https://openalex.org/W4389858081","https://openalex.org/W4385583601","https://openalex.org/W4379231730","https://openalex.org/W4298131179","https://openalex.org/W2501551404","https://openalex.org/W2113201962","https://openalex.org/W2112284452","https://openalex.org/W2042327336","https://openalex.org/W2033914206"],"abstract_inverted_index":{"Referring":[0],"Expression":[1],"Segmentation":[2],"(RES)":[3],"aims":[4],"to":[5,18,38,165],"provide":[6],"a":[7,48,73,132,140,196],"segmentation":[8],"mask":[9,29,77],"of":[10,62,75,94,109,127],"the":[11,20,43,60,67,110,114,172],"target":[12,111],"object":[13,112],"in":[14,82,96,113,195,204],"an":[15],"image":[16],"referred":[17],"by":[19,171,187],"text":[21],"(i.e.,":[22],"referring":[23],"expression).":[24],"Existing":[25],"methods":[26],"require":[27],"large-scale":[28],"annotations.":[30],"Moreover,":[31],"such":[32,97],"approaches":[33],"do":[34],"not":[35],"generalize":[36],"well":[37],"unseen/zero-shot":[39,205],"scenarios.":[40],"To":[41,59,90],"address":[42],"aforementioned":[44],"issues,":[45],"we":[46,116,130],"propose":[47,117],"weakly-supervised":[49],"bootstrapping":[50],"architecture":[51],"for":[52,88],"RES":[53],"with":[54,120,151],"several":[55],"new":[56],"algorithmic":[57],"innovations.":[58],"best":[61],"our":[63,155],"knowledge,":[64],"ours":[65],"is":[66],"first":[68],"approach":[69],"that":[70,150],"considers":[71],"only":[72],"fraction":[74],"both":[76],"and":[78,85,104,160,167,180,191,199],"box":[79],"annotations":[80],"(shown":[81],"Figure":[83],"1":[84],"Table":[86],"1)":[87],"training.":[89],"enable":[91],"principled":[92],"training":[93],"models":[95],"low-annotation":[98],"settings,":[99],"improve":[100],"image-text":[101],"region-level":[102],"alignment,":[103],"further":[105],"enhance":[106],"spatial":[107],"localization":[108],"image,":[115],"Cross-modal":[118],"Fusion":[119],"Attention":[121],"Consistency":[122],"module.":[123],"For":[124],"automatic":[125],"pseudo-labeling":[126],"unlabeled":[128],"samples,":[129],"introduce":[131],"novel":[133],"Mask":[134],"Validity":[135],"Filtering":[136],"routine":[137],"based":[138],"on":[139,178],"spatially":[141],"aware":[142],"zero-shot":[143],"proposal":[144],"scoring":[145],"approach.":[146],"Extensive":[147],"experiments":[148],"show":[149],"just":[152],"30%":[153],"annotations,":[154],"model":[156],"SafaRi":[157,183],"achieves":[158],"59.31":[159],"48.26":[161],"mIoUs":[162,169],"as":[163],"compared":[164],"58.93":[166],"48.19":[168],"obtained":[170],"fully-supervised":[173,197],"SOTA":[174],"method":[175],"SeqTR":[176,186],"respectively":[177],"RefCOCO+@testA":[179],"RefCOCO+testB":[181],"datasets.":[182],"also":[184],"outperforms":[185],"11.7%":[188],"(on":[189,193],"RefCOCO+testA)":[190],"19.6%":[192],"RefCOCO+testB)":[194],"setting":[198],"demonstrates":[200],"strong":[201],"generalization":[202],"capabilities":[203],"tasks.":[206]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4400374481","counts_by_year":[],"updated_date":"2024-12-10T14:02:46.918767","created_date":"2024-07-06"}