{"id":"https://openalex.org/W4284681239","doi":"https://doi.org/10.48550/arxiv.2207.02088","title":"SiamMask: A Framework for Fast Online Object Tracking and Segmentation","display_name":"SiamMask: A Framework for Fast Online Object Tracking and Segmentation","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4284681239","doi":"https://doi.org/10.48550/arxiv.2207.02088"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2207.02088","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"journal-article","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2207.02088","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5114549594","display_name":"Weiming Hu","orcid":"https://orcid.org/0000-0001-9237-8825"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hu, Weiming","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100367009","display_name":"Qiang Wang","orcid":"https://orcid.org/0000-0002-7078-7545"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Qiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100425669","display_name":"Li Zhang","orcid":"https://orcid.org/0000-0002-9321-3421"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Li","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009876111","display_name":"Luca Bertinetto","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bertinetto, Luca","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5042899882","display_name":"Philip H. S. Torr","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Torr, Philip H. S.","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":60},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.9996,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.9996,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.9993,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11707","display_name":"Gaze Tracking and Assistive Technology","score":0.9928,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/initialization","display_name":"Initialization","score":0.70849013},{"id":"https://openalex.org/keywords/minimum-bounding-box","display_name":"Minimum bounding box","score":0.58752185},{"id":"https://openalex.org/keywords/frame-rate","display_name":"Frame rate","score":0.52368534},{"id":"https://openalex.org/keywords/segmentation-based-object-categorization","display_name":"Segmentation-based object categorization","score":0.45954674},{"id":"https://openalex.org/keywords/tracking","display_name":"Tracking (education)","score":0.45131612},{"id":"https://openalex.org/keywords/active-appearance-model","display_name":"Active appearance model","score":0.41637272}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8124734},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.7857607},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7802638},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.73931706},{"id":"https://openalex.org/C114466953","wikidata":"https://www.wikidata.org/wiki/Q6034165","display_name":"Initialization","level":2,"score":0.70849013},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.70403934},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.6314558},{"id":"https://openalex.org/C147037132","wikidata":"https://www.wikidata.org/wiki/Q6865426","display_name":"Minimum bounding box","level":3,"score":0.58752185},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.54560655},{"id":"https://openalex.org/C3261483","wikidata":"https://www.wikidata.org/wiki/Q119565","display_name":"Frame rate","level":2,"score":0.52368534},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.48604137},{"id":"https://openalex.org/C65885262","wikidata":"https://www.wikidata.org/wiki/Q7429708","display_name":"Scale-space segmentation","level":4,"score":0.4710184},{"id":"https://openalex.org/C25694479","wikidata":"https://www.wikidata.org/wiki/Q7446278","display_name":"Segmentation-based object categorization","level":5,"score":0.45954674},{"id":"https://openalex.org/C2775936607","wikidata":"https://www.wikidata.org/wiki/Q466845","display_name":"Tracking (education)","level":2,"score":0.45131612},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.45024207},{"id":"https://openalex.org/C83248878","wikidata":"https://www.wikidata.org/wiki/Q344000","display_name":"Active appearance model","level":3,"score":0.41637272},{"id":"https://openalex.org/C56461940","wikidata":"https://www.wikidata.org/wiki/Q970687","display_name":"Eye tracking","level":2,"score":0.41381255},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.24047887},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.06193638},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C19417346","wikidata":"https://www.wikidata.org/wiki/Q7922","display_name":"Pedagogy","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2207.02088","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2207.02088","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2207.02088","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W3144569342","https://openalex.org/W2945274617","https://openalex.org/W2387793296","https://openalex.org/W2386644571","https://openalex.org/W2372421320","https://openalex.org/W2185902295","https://openalex.org/W2057775483","https://openalex.org/W2041871225","https://openalex.org/W2038525722","https://openalex.org/W1558398159"],"abstract_inverted_index":{"In":[0],"this":[1],"paper":[2],"we":[3,74],"introduce":[4],"SiamMask,":[5],"a":[6,41,54,98,135],"framework":[7,83],"to":[8,80,84],"perform":[9],"both":[10],"visual":[11,65],"object":[12,16,66,87,140],"tracking":[13,67,88,124],"and":[14,60,68,89],"video":[15,139],"segmentation,":[17],"in":[18,97],"real-time,":[19],"with":[20,40],"the":[21,27,46,82,94,128],"same":[22,129],"simple":[23],"method.":[24],"We":[25],"improve":[26],"offline":[28,47],"training":[29,48],"procedure":[30],"of":[31],"popular":[32],"fully-convolutional":[33],"Siamese":[34],"approaches":[35],"by":[36,91],"augmenting":[37],"their":[38],"losses":[39],"binary":[42],"segmentation":[43,69,90,141],"task.":[44],"Once":[45],"is":[49,78],"completed,":[50],"SiamMask":[51],"only":[52],"requires":[53],"single":[55],"bounding":[56],"box":[57],"for":[58,138],"initialization":[59],"can":[61],"simultaneously":[62],"carry":[63],"out":[64],"at":[70,111,127,134],"high":[71,108,136],"frame-rates.":[72],"Moreover,":[73],"show":[75,103],"that":[76,104],"it":[77],"possible":[79],"extend":[81],"handle":[85],"multiple":[86],"simply":[92],"re-using":[93],"multi-task":[95],"model":[96],"cascaded":[99],"fashion.":[100],"Experimental":[101],"results":[102,121],"our":[105],"approach":[106],"has":[107],"processing":[109],"efficiency,":[110],"around":[112],"55":[113],"frames":[114],"per":[115],"second.":[116],"It":[117],"yields":[118],"real-time":[119],"state-of-the-art":[120],"on":[122],"visual-object":[123],"benchmarks,":[125],"while":[126],"time":[130],"demonstrating":[131],"competitive":[132],"performance":[133],"speed":[137],"benchmarks.":[142]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4284681239","counts_by_year":[],"updated_date":"2025-01-20T07:45:33.484002","created_date":"2022-07-08"}