{"id":"https://openalex.org/W2963545982","doi":"https://doi.org/10.1145/3331184.3340264","title":"Automatic Understanding of the Visual World","display_name":"Automatic Understanding of the Visual World","publication_year":2019,"publication_date":"2019-07-18","ids":{"openalex":"https://openalex.org/W2963545982","doi":"https://doi.org/10.1145/3331184.3340264","mag":"2963545982"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/3331184.3340264","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109890544","display_name":"Cordelia Schmid","orcid":null},"institutions":[{"id":"https://openalex.org/I4210101348","display_name":"Centre Inria de l'Universit\u00e9 Grenoble Alpes","ror":"https://ror.org/00n8d6z93","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1326498283","https://openalex.org/I4210101348"]},{"id":"https://openalex.org/I1326498283","display_name":"Institut national de recherche en informatique et en automatique","ror":"https://ror.org/02kvxyf05","country_code":"FR","type":"government","lineage":["https://openalex.org/I1326498283"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"Cordelia Schmid","raw_affiliation_strings":["INRIA, Montbonnot, France"],"affiliations":[{"raw_affiliation_string":"INRIA, Montbonnot, France","institution_ids":["https://openalex.org/I4210101348","https://openalex.org/I1326498283"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5109890544"],"corresponding_institution_ids":["https://openalex.org/I4210101348","https://openalex.org/I1326498283"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"fulltext_origin":"ngrams","cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":62},"biblio":{"volume":null,"issue":null,"first_page":"3","last_page":"4"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Action Recognition and Pose Estimation","score":0.9996,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Action Recognition and Pose Estimation","score":0.9996,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Visual Question Answering in Images and Videos","score":0.9856,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Stereo Vision and Depth Estimation","score":0.9848,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/action-recognition","display_name":"Action Recognition","score":0.595291},{"id":"https://openalex.org/keywords/visual-question-answering","display_name":"Visual Question Answering","score":0.574641},{"id":"https://openalex.org/keywords/visual-servoing","display_name":"Visual Servoing","score":0.531584},{"id":"https://openalex.org/keywords/attention-mechanism","display_name":"Attention Mechanism","score":0.527933},{"id":"https://openalex.org/keywords/image-captioning","display_name":"Image Captioning","score":0.527655}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7755928},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6915065},{"id":"https://openalex.org/C31170391","wikidata":"https://www.wikidata.org/wiki/Q188619","display_name":"Hierarchy","level":2,"score":0.5719116},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.52098113},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.48078912},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.47294402},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.44880277},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.42561433},{"id":"https://openalex.org/C126388530","wikidata":"https://www.wikidata.org/wiki/Q1131737","display_name":"Imitation","level":2,"score":0.42256352},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.38023922},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C34447519","wikidata":"https://www.wikidata.org/wiki/Q179522","display_name":"Market economy","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/3331184.3340264","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality education","id":"https://metadata.un.org/sdg/4","score":0.81}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4387497383","https://openalex.org/W3183948672","https://openalex.org/W3173606202","https://openalex.org/W3110381201","https://openalex.org/W2948807893","https://openalex.org/W2935909890","https://openalex.org/W2778153218","https://openalex.org/W2758277628","https://openalex.org/W2665305151","https://openalex.org/W1531601525"],"abstract_inverted_index":{"One":[0],"of":[1,5,35,111,117,127,153],"the":[2,12,16],"central":[3],"problems":[4],"artificial":[6],"intelligence":[7],"is":[8],"machine":[9],"perception,":[10],"i.e.,":[11],"ability":[13],"to":[14,49,78,107,113,122,124],"understand":[15],"visual":[17],"world":[18],"based":[19,89,145],"on":[20,47,98,139,146],"input":[21],"from":[22,86,101],"sensors":[23],"such":[24],"as":[25],"cameras.":[26],"In":[27],"this":[28,39,109],"talk,":[29],"I":[30,41,63,134],"will":[31,42,64,135],"present":[32,136],"recent":[33,137],"progress":[34],"my":[36],"team":[37],"in":[38,71],"direction.":[40],"start":[43],"with":[44,129,141,150],"presenting":[45],"results":[46,67],"how":[48,106],"generate":[50],"additional":[51],"training":[52],"data":[53,128],"using":[54],"weak":[55],"annotations,":[56],"motion":[57],"information":[58,100],"and":[59,91,94,155],"synthetic":[60],"data.":[61],"Next,":[62],"discuss":[65],"our":[66],"for":[68],"action":[69],"recognition":[70],"videos,":[72],"where":[73],"human":[74],"tubelets":[75],"have":[76],"shown":[77],"be":[79],"successful.":[80],"Our":[81],"tubelet":[82],"approach":[83],"moves":[84],"away":[85],"state-of-the-art":[87],"frame":[88],"approaches":[90],"improves":[92],"classification":[93],"localization":[95],"by":[96],"relying":[97],"joint":[99],"several":[102],"frames.":[103],"We":[104],"show":[105],"extend":[108],"type":[110],"method":[112],"weakly":[114],"supervised":[115],"learning":[116,147],"actions,":[118],"which":[119],"allows":[120],"us":[121],"scale":[123],"large":[125],"amounts":[126],"sparse":[130],"manual":[131],"annotation.":[132],"Finally,":[133],"work":[138],"grasping":[140],"a":[142,151],"robot":[143],"arm":[144],"long-horizon":[148],"manipulations":[149],"hierarchy":[152],"RL":[154],"imitation-based":[156],"skills.":[157]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2963545982","counts_by_year":[],"updated_date":"2024-10-17T17:09:52.085143","created_date":"2019-07-30"}