{"id":"https://openalex.org/W4393213856","doi":"https://doi.org/10.48550/arxiv.2403.16997","title":"Composed Video Retrieval via Enriched Context and Discriminative\n Embeddings","display_name":"Composed Video Retrieval via Enriched Context and Discriminative\n Embeddings","publication_year":2024,"publication_date":"2024-03-25","ids":{"openalex":"https://openalex.org/W4393213856","doi":"https://doi.org/10.48550/arxiv.2403.16997"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2403.16997","pdf_url":"http://arxiv.org/pdf/2403.16997","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2403.16997","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5074650489","display_name":"Omkar Thawakar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Thawakar, Omkar","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083105774","display_name":"Muzammal Naseer","orcid":"https://orcid.org/0000-0001-7663-7161"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Naseer, Muzammal","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022929869","display_name":"Rao Muhammad Anwer","orcid":"https://orcid.org/0000-0002-9041-2214"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Anwer, Rao Muhammad","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101483978","display_name":"Salman Khan","orcid":"https://orcid.org/0000-0001-8732-3395"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Khan, Salman","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042087981","display_name":"Michael Felsberg","orcid":"https://orcid.org/0000-0002-6096-3648"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Felsberg, Michael","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080823547","display_name":"Mubarak Shah","orcid":"https://orcid.org/0000-0001-6172-5572"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shah, Mubarak","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100760570","display_name":"Fahad Shahbaz Khan","orcid":"https://orcid.org/0000-0002-4263-3143"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Khan, Fahad Shahbaz","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":78},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9959,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9959,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9954,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9932,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.8122765}],"concepts":[{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.8122765},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.6359986},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5628189},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.453805},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3404355},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.12106261},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.044472724}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2403.16997","pdf_url":"http://arxiv.org/pdf/2403.16997","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2403.16997","pdf_url":"http://arxiv.org/pdf/2403.16997","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4389116644","https://openalex.org/W4205463238","https://openalex.org/W3103844505","https://openalex.org/W2981954115","https://openalex.org/W2748952813","https://openalex.org/W259157601","https://openalex.org/W2521627374","https://openalex.org/W2153315159","https://openalex.org/W2110523656","https://openalex.org/W1482209366"],"abstract_inverted_index":{"Composed":[0],"video":[1,26,67,118],"retrieval":[2,123],"(CoVR)":[3,119],"is":[4],"a":[5,48,73],"challenging":[6],"problem":[7],"in":[8,28,58,150],"computer":[9],"vision":[10,93],"which":[11],"has":[12],"recently":[13],"highlighted":[14],"the":[15,54,65],"integration":[16],"of":[17,92,152],"modification":[18,40],"text":[19,41,95],"with":[20,39],"visual":[21,36,69],"queries":[22,37],"for":[23,99,115,136,161],"more":[24],"sophisticated":[25],"search":[27],"large":[29],"databases.":[30],"Existing":[31],"works":[32],"predominantly":[33],"rely":[34],"on":[35,126],"combined":[38],"to":[42,51,82,102],"distinguish":[43],"relevant":[44],"videos.":[45,107],"However,":[46],"such":[47],"strategy":[49],"struggles":[50],"fully":[52],"preserve":[53],"rich":[55],"query-specific":[56,85],"context":[57],"retrieved":[59],"target":[60,66,106],"videos":[61],"and":[62,88,97,120,139],"only":[63,96],"represents":[64],"using":[68],"embedding.":[70],"We":[71],"introduce":[72],"novel":[74],"CoVR":[75],"framework":[76,110],"that":[77,130],"leverages":[78],"detailed":[79,158],"language":[80,159],"descriptions":[81,160],"explicitly":[83],"encode":[84],"contextual":[86],"information":[87],"learns":[89],"discriminative":[90],"embeddings":[91],"only,":[94],"vision-text":[98],"better":[100],"alignment":[101],"accurately":[103],"retrieve":[104],"matched":[105],"Our":[108,155],"proposed":[109],"can":[111],"be":[112],"flexibly":[113],"employed":[114],"both":[116,137],"composed":[117],"image":[121],"(CoIR)":[122],"tasks.":[124],"Experiments":[125],"three":[127],"datasets":[128],"show":[129],"our":[131],"approach":[132],"obtains":[133],"state-of-the-art":[134],"performance":[135],"CovR":[138],"zero-shot":[140],"CoIR":[141],"tasks,":[142],"achieving":[143],"gains":[144],"as":[145,147],"high":[146],"around":[148],"7%":[149],"terms":[151],"recall@K=1":[153],"score.":[154],"code,":[156],"models,":[157],"WebViD-CoVR":[162],"dataset":[163],"are":[164],"available":[165],"at":[166],"\\url{https://github.com/OmkarThawakar/composed-video-retrieval}":[167]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4393213856","counts_by_year":[],"updated_date":"2025-04-11T15:33:24.824507","created_date":"2024-03-27"}