{"id":"https://openalex.org/W3175995235","doi":"https://doi.org/10.15607/rss.2021.xvii.012","title":"Learning Generalizable Robotic Reward Functions from \u201cIn-The-Wild\u201d Human Videos","display_name":"Learning Generalizable Robotic Reward Functions from \u201cIn-The-Wild\u201d Human Videos","publication_year":2021,"publication_date":"2021-06-27","ids":{"openalex":"https://openalex.org/W3175995235","doi":"https://doi.org/10.15607/rss.2021.xvii.012","mag":"3175995235"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.15607/rss.2021.xvii.012","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://doi.org/10.15607/rss.2021.xvii.012","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5074961334","display_name":"Annie Chen","orcid":"https://orcid.org/0000-0003-3070-8336"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"funder","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Annie Chen","raw_affiliation_strings":["Stanford University"],"affiliations":[{"raw_affiliation_string":"Stanford University","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101603882","display_name":"Suraj Nair","orcid":"https://orcid.org/0000-0002-3999-2436"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"funder","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Suraj Nair","raw_affiliation_strings":["Stanford University"],"affiliations":[{"raw_affiliation_string":"Stanford University","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5005431772","display_name":"Chelsea Finn","orcid":"https://orcid.org/0000-0001-6298-0874"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"funder","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chelsea Finn","raw_affiliation_strings":["Stanford University"],"affiliations":[{"raw_affiliation_string":"Stanford University","institution_ids":["https://openalex.org/I97018004"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.852,"has_fulltext":false,"cited_by_count":32,"citation_normalized_percentile":{"value":0.836516,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9924,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9924,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9902,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9736,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/human\u2013robot-interaction","display_name":"Human\u2013robot interaction","score":0.4109876}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.67402154},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5310631},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.42135093},{"id":"https://openalex.org/C145460709","wikidata":"https://www.wikidata.org/wiki/Q859951","display_name":"Human\u2013robot interaction","level":3,"score":0.4109876},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.40102744},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.39238936}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.15607/rss.2021.xvii.012","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2103.16817","pdf_url":"https://arxiv.org/pdf/2103.16817","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.15607/rss.2021.xvii.012","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10","score":0.71}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":55,"referenced_works":["https://openalex.org/W1613163714","https://openalex.org/W1927052826","https://openalex.org/W1929981607","https://openalex.org/W1995820507","https://openalex.org/W2009618608","https://openalex.org/W2098774185","https://openalex.org/W2108598243","https://openalex.org/W2112913186","https://openalex.org/W2158782408","https://openalex.org/W2169498096","https://openalex.org/W2201912979","https://openalex.org/W2290104316","https://openalex.org/W2528489519","https://openalex.org/W2597261100","https://openalex.org/W2625366777","https://openalex.org/W2769112066","https://openalex.org/W2787053496","https://openalex.org/W2806130867","https://openalex.org/W2823112946","https://openalex.org/W2896457183","https://openalex.org/W2902125520","https://openalex.org/W2938421504","https://openalex.org/W2952267586","https://openalex.org/W2962793652","https://openalex.org/W2962936820","https://openalex.org/W2963410356","https://openalex.org/W2963435596","https://openalex.org/W2963802910","https://openalex.org/W2963860638","https://openalex.org/W2968227116","https://openalex.org/W2981344907","https://openalex.org/W2990747716","https://openalex.org/W3004396697","https://openalex.org/W3009295642","https://openalex.org/W3020712699","https://openalex.org/W3028676366","https://openalex.org/W3036498537","https://openalex.org/W3038245394","https://openalex.org/W3038298277","https://openalex.org/W3040490156","https://openalex.org/W3048833305","https://openalex.org/W3091086138","https://openalex.org/W3102502242","https://openalex.org/W3108330043","https://openalex.org/W3150177537","https://openalex.org/W4287599290","https://openalex.org/W4288020136","https://openalex.org/W4288333732","https://openalex.org/W4288350579","https://openalex.org/W4289294484","https://openalex.org/W4292779060","https://openalex.org/W4299591591","https://openalex.org/W4306882122","https://openalex.org/W4323640751","https://openalex.org/W834081922"],"related_works":["https://openalex.org/W4312091514","https://openalex.org/W4288102937","https://openalex.org/W4285102375","https://openalex.org/W4283782265","https://openalex.org/W4229726131","https://openalex.org/W3101585944","https://openalex.org/W3097632046","https://openalex.org/W3042490037","https://openalex.org/W2323122434","https://openalex.org/W1497899746"],"abstract_inverted_index":{"We":[0],"are":[1,162],"motivated":[2],"by":[3,153,170,190],"the":[4,23,54,164],"goal":[5],"of":[6,15,30,107,119,126,135,172,178,185],"generalist":[7],"robots":[8],"that":[9,148,189],"can":[10,168,199,215],"complete":[11],"a":[12,49,92,132,141,155,175,182,229,238],"wide":[13],"range":[14,134],"tasks":[16,130,227],"across":[17,67,131],"many":[18],"environments.Critical":[19],"to":[20,26,45,64,157,203,210,223],"this":[21,57,137,195],"is":[22,36],"robot's":[24],"ability":[25],"acquire":[27],"some":[28],"metric":[29],"task":[31],"success":[32],"or":[33,42],"reward,":[34],"which":[35],"necessary":[37],"for":[38,47],"reinforcement":[39],"learning,":[40],"planning,":[41],"knowing":[43],"when":[44],"ask":[46],"help.For":[48],"general-purpose":[50],"robot":[51,179,232],"operating":[52],"in":[53,97,233],"real":[55,230],"world,":[56],"reward":[58,151,196],"function":[59,197],"must":[60],"also":[61],"be":[62,216],"able":[63],"generalize":[65,169,200,207],"broadly":[66],"environments,":[68,205],"tasks,":[69,212],"and":[70,85,100,167,213],"objects,":[71],"while":[72],"depending":[73],"only":[74],"on":[75,83,228],"on-board":[76],"sensor":[77],"observations":[78],"(e.g.RGB":[79],"images).While":[80],"deep":[81],"learning":[82,173],"large":[84],"diverse":[86,133,192],"datasets":[87,106],"has":[88],"shown":[89],"promise":[90],"as":[91],"path":[93],"towards":[94],"such":[95],"generalization":[96],"computer":[98],"vision":[99],"natural":[101],"language,":[102],"collecting":[103],"high":[104],"quality":[105],"robotic":[108,225],"interaction":[109],"at":[110],"scale":[111],"remains":[112],"an":[113,123,234],"open":[114],"challenge.In":[115],"contrast,":[116],"\"in-the-wild\"":[117],"videos":[118,161],"humans":[120],"(e.g.YouTube)":[121],"contain":[122],"extensive":[124],"collection":[125],"people":[127],"doing":[128],"interesting":[129],"settings.In":[136],"work,":[138],"we":[139],"propose":[140],"simple":[142],"approach,":[143],"Domain-agnostic":[144],"Video":[145],"Discriminator":[146],"(DVD),":[147],"learns":[149],"multitask":[150],"functions":[152],"training":[154],"discriminator":[156],"classify":[158],"whether":[159],"two":[160],"performing":[163],"same":[165],"task,":[166],"virtue":[171],"from":[174,237],"small":[176],"amount":[177],"data":[180],"with":[181,218],"broad":[183],"dataset":[184],"human":[186,193,240],"videos.We":[187],"find":[188],"leveraging":[191],"datasets,":[194],"(a)":[198],"zero":[201,208],"shot":[202,209],"unseen":[204,211,235],"(b)":[206],"(c)":[214],"combined":[217],"visual":[219],"model":[220],"predictive":[221],"control":[222],"solve":[224],"manipulation":[226],"WidowX200":[231],"environment":[236],"single":[239],"demo.":[241]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W3175995235","counts_by_year":[{"year":2024,"cited_by_count":13},{"year":2023,"cited_by_count":14},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":1}],"updated_date":"2025-04-26T12:35:15.510939","created_date":"2021-07-05"}