{"id":"https://openalex.org/W4281488000","doi":"https://doi.org/10.48550/arxiv.2205.10816","title":"Chain of Thought Imitation with Procedure Cloning","display_name":"Chain of Thought Imitation with Procedure Cloning","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4281488000","doi":"https://doi.org/10.48550/arxiv.2205.10816"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2205.10816","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2205.10816","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100546518","display_name":"Mengjiao Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Mengjiao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010575626","display_name":"Dale Schuurmans","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Schuurmans, Dale","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049349154","display_name":"Pieter Abbeel","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Abbeel, Pieter","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5057773393","display_name":"Ofir Nachum","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nachum, Ofir","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.824796,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":75,"max":78},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9947,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9947,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10906","display_name":"AI-based Problem Solving and Planning","score":0.9837,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.982,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.7250129},{"id":"https://openalex.org/keywords/cloning","display_name":"Cloning (programming)","score":0.46717247}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.73542607},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.7250129},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6395971},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5580789},{"id":"https://openalex.org/C58328972","wikidata":"https://www.wikidata.org/wiki/Q184609","display_name":"Expert system","level":2,"score":0.48632535},{"id":"https://openalex.org/C121050878","wikidata":"https://www.wikidata.org/wiki/Q5135020","display_name":"Cloning (programming)","level":2,"score":0.46717247},{"id":"https://openalex.org/C126388530","wikidata":"https://www.wikidata.org/wiki/Q1131737","display_name":"Imitation","level":2,"score":0.4619456},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.45180902},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.44815785},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.33144468},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2205.10816","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2205.10816","pdf_url":"http://arxiv.org/pdf/2205.10816","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2205.10816","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2205.10816","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4387497383","https://openalex.org/W4379251913","https://openalex.org/W3183948672","https://openalex.org/W3173606202","https://openalex.org/W3110381201","https://openalex.org/W2948807893","https://openalex.org/W2935909890","https://openalex.org/W2778153218","https://openalex.org/W2758277628","https://openalex.org/W1531601525"],"abstract_inverted_index":{"Imitation":[0],"learning":[1,19,23,51,56],"aims":[2],"to":[3,16,32,43,119,128,141,156,163,182,195,210,220,251,257],"extract":[4],"high-performance":[5],"policies":[6,253],"from":[7],"logged":[8,39],"demonstrations":[9,40,82,104],"of":[10,49,65,74,161,199,244],"expert":[11,81,88,103,168,178,200],"behavior.":[12,89],"It":[13],"is":[14,68,271],"common":[15],"frame":[17],"imitation":[18,50],"as":[20,52,94,153],"a":[21,29,53,62,154],"supervised":[22,54,192],"problem":[24,57,76],"in":[25,61,77],"which":[26,190,265],"one":[27],"fits":[28],"function":[30],"approximator":[31],"the":[33,38,47,75,80,116,124,142,174,177,184,197,213,224,241,267],"input-output":[34,55],"mapping":[35,160],"exhibited":[36],"by":[37],"(input":[41],"observations":[42],"output":[44,117,214],"actions).":[45],"While":[46,132],"framing":[48],"allows":[58],"for":[59,126,264],"applicability":[60],"wide":[63],"variety":[64],"settings,":[66],"it":[67,222],"also":[69,123],"an":[70,158,245],"overly":[71],"simplistic":[72],"view":[73],"situations":[78],"where":[79],"provide":[83],"much":[84],"richer":[85],"insight":[86],"into":[87],"For":[90],"example,":[91],"applications":[92],"such":[93],"path":[95],"navigation,":[96,230],"robot":[97],"manipulation,":[98,233],"and":[99,218,234],"strategy":[100],"games":[101],"acquire":[102],"via":[105],"planning,":[106],"search,":[107],"or":[108],"some":[109],"other":[110],"multi-step":[111],"algorithm,":[112],"revealing":[113],"not":[114,139,207],"just":[115],"action":[118],"be":[120],"imitated":[121],"but":[122,216],"procedure":[125,169,188,204,249,269],"how":[127,217],"determine":[129],"this":[130],"action.":[131],"these":[133],"intermediate":[134,242],"computations":[135,243],"may":[136,179],"use":[137],"tools":[138,176],"available":[140],"agent":[143],"during":[144],"inference":[145],"(e.g.,":[146],"environment":[147,259],"simulators),":[148],"they":[149],"are":[150],"nevertheless":[151],"informative":[152],"way":[155],"explain":[157],"expert's":[159,246,268],"state":[162],"actions.":[164],"To":[165],"properly":[166],"leverage":[167],"information":[170],"without":[171],"relying":[172],"on":[173,229],"privileged":[175],"have":[180],"used":[181],"perform":[183],"procedure,":[185],"we":[186,237],"propose":[187],"cloning,":[189],"applies":[191],"sequence":[193],"prediction":[194],"imitate":[196],"series":[198],"computations.":[201],"This":[202],"way,":[203],"cloning":[205,250],"learns":[206],"only":[208],"what":[209],"do":[211,221],"(i.e.,":[212,223],"action),":[215],"why":[219],"procedure).":[225],"Through":[226],"empirical":[227],"analysis":[228],"simulated":[231],"robotic":[232],"game-playing":[235],"environments,":[236],"show":[238],"that":[239],"imitating":[240],"behavior":[247],"enables":[248],"learn":[252],"exhibiting":[254],"significant":[255],"generalization":[256],"unseen":[258],"configurations,":[260],"including":[261],"those":[262],"configurations":[263],"running":[266],"directly":[270],"infeasible.":[272]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4281488000","counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1}],"updated_date":"2025-04-23T20:26:11.710645","created_date":"2022-05-26"}