{"id":"https://openalex.org/W4388328933","doi":"https://doi.org/10.48550/arxiv.2311.00926","title":"M2T2: Multi-Task Masked Transformer for Object-centric Pick and Place","display_name":"M2T2: Multi-Task Masked Transformer for Object-centric Pick and Place","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4388328933","doi":"https://doi.org/10.48550/arxiv.2311.00926"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2311.00926","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2311.00926","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5058911745","display_name":"Wentao Yuan","orcid":"https://orcid.org/0000-0002-3836-8877"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuan, Wentao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114179160","display_name":"Adithyavairavan Murali","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Murali, Adithyavairavan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021728625","display_name":"Arsalan Mousavian","orcid":"https://orcid.org/0000-0001-9356-9455"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mousavian, Arsalan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5108257764","display_name":"Dieter Fox","orcid":"https://orcid.org/0009-0009-4694-9127"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fox, Dieter","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":69},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robotic Grasping and Learning from Demonstration","score":0.9997,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robotic Grasping and Learning from Demonstration","score":0.9997,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Deep Learning in Computer Vision and Image Recognition","score":0.9958,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Visual Question Answering in Images and Videos","score":0.9944,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/3d-object-recognition","display_name":"3D Object Recognition","score":0.525476},{"id":"https://openalex.org/keywords/object-detection","display_name":"Object Detection","score":0.52519},{"id":"https://openalex.org/keywords/object-pose-estimation","display_name":"Object Pose Estimation","score":0.519556},{"id":"https://openalex.org/keywords/robot-learning","display_name":"Robot Learning","score":0.512618},{"id":"https://openalex.org/keywords/human-robot-collaboration","display_name":"Human-Robot Collaboration","score":0.510036},{"id":"https://openalex.org/keywords/bridge","display_name":"Bridge (graph theory)","score":0.49387437}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8242783},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.6590354},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6503233},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6185269},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.55955166},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5378899},{"id":"https://openalex.org/C131979681","wikidata":"https://www.wikidata.org/wiki/Q1899648","display_name":"Point cloud","level":2,"score":0.5009661},{"id":"https://openalex.org/C100776233","wikidata":"https://www.wikidata.org/wiki/Q2532492","display_name":"Bridge (graph theory)","level":2,"score":0.49387437},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.43946967},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.32517874},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.08345616},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C126322002","wikidata":"https://www.wikidata.org/wiki/Q11180","display_name":"Internal medicine","level":1,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2311.00926","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2311.00926","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2311.00926","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"display_name":"Peace, justice, and strong institutions","score":0.83,"id":"https://metadata.un.org/sdg/16"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W657108774","https://openalex.org/W4389574804","https://openalex.org/W3016928466","https://openalex.org/W2936725271","https://openalex.org/W2615136228","https://openalex.org/W2480620430","https://openalex.org/W2390192952","https://openalex.org/W2373296418","https://openalex.org/W1515761309","https://openalex.org/W1501776718"],"abstract_inverted_index":{"With":[0],"the":[1,45,123,140,144,163],"advent":[2],"of":[3,47,60,70,86,122,180,187],"large":[4],"language":[5,32,181],"models":[6,24,55,150],"and":[7,108,157,197],"large-scale":[8,128],"robotic":[9],"datasets,":[10],"there":[11],"has":[12],"been":[13],"tremendous":[14],"progress":[15],"in":[16,57,95,154,159,184,193],"high-level":[17],"decision-making":[18],"for":[19,66,113,169],"object":[20,164],"manipulation.":[21],"These":[22],"generic":[23],"are":[25,199],"able":[26],"to":[27,40,44,166],"interpret":[28],"complex":[29],"tasks":[30,183],"using":[31],"commands,":[33],"but":[34,63],"they":[35],"often":[36],"have":[37],"difficulties":[38],"generalizing":[39],"out-of-distribution":[41],"objects":[42,94,192],"due":[43],"inability":[46],"low-level":[48,58,87],"action":[49,115],"primitives.":[50],"In":[51],"contrast,":[52],"existing":[53],"task-specific":[54,149],"excel":[56],"manipulation":[59],"unknown":[61],"objects,":[62],"only":[64],"work":[65,90],"a":[67,79,100,118,127,178],"single":[68,80],"type":[69],"action.":[71],"To":[72],"bridge":[73],"this":[74],"gap,":[75],"we":[76],"present":[77],"M2T2,":[78],"model":[81,102],"that":[82,89],"supplies":[83],"different":[84,114],"types":[85],"actions":[88],"robustly":[91],"on":[92,126,139,177,190,201],"arbitrary":[93],"cluttered":[96],"scenes.":[97],"M2T2":[98,134,172],"is":[99],"transformer":[101],"which":[103],"reasons":[104],"about":[105,152],"contact":[106],"points":[107],"predicts":[109],"valid":[110],"gripper":[111],"poses":[112],"modes":[116],"given":[117],"raw":[119],"point":[120],"cloud":[121],"scene.":[124],"Trained":[125],"synthetic":[129],"dataset":[130],"with":[131,147],"128K":[132],"scenes,":[133],"achieves":[135,174],"zero-shot":[136],"sim2real":[137],"transfer":[138],"real":[141,195],"robot,":[142],"outperforming":[143],"baseline":[145],"system":[146],"state-of-the-art":[148,175],"by":[151],"19%":[153],"overall":[155],"performance":[156],"37.5%":[158],"challenging":[160],"scenes":[161],"where":[162],"needs":[165],"be":[167],"re-oriented":[168],"collision-free":[170],"placement.":[171],"also":[173],"results":[176],"subset":[179],"conditioned":[182],"RLBench.":[185],"Videos":[186],"robot":[188],"experiments":[189],"unseen":[191],"both":[194],"world":[196],"simulation":[198],"available":[200],"our":[202],"project":[203],"website":[204],"https://m2-t2.github.io.":[205]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4388328933","counts_by_year":[],"updated_date":"2024-11-15T17:47:45.495281","created_date":"2023-11-04"}