{"id":"https://openalex.org/W4376167126","doi":"https://doi.org/10.48550/arxiv.2305.05948","title":"Multi-Path Transformer is Better: A Case Study on Neural Machine Translation","display_name":"Multi-Path Transformer is Better: A Case Study on Neural Machine Translation","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4376167126","doi":"https://doi.org/10.48550/arxiv.2305.05948"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2305.05948","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2305.05948","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101230949","display_name":"Ye Lin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lin, Ye","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076295893","display_name":"Shuhan Zhou","orcid":"https://orcid.org/0000-0002-7131-5144"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Shuhan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101595086","display_name":"Yanyang Li","orcid":"https://orcid.org/0000-0002-6534-0618"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Yanyang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044361097","display_name":"Anxiang Ma","orcid":"https://orcid.org/0009-0007-4290-8171"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Anxiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100600701","display_name":"Tong Xiao","orcid":"https://orcid.org/0000-0002-5842-6501"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiao, Tong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100370155","display_name":"Jingbo Zhu","orcid":"https://orcid.org/0000-0002-6537-7007"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Jingbo","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":68},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9629,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9629,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9358,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization (sociology)","score":0.7086473},{"id":"https://openalex.org/keywords/neural-machine-translation","display_name":"Neural Machine Translation","score":0.540695},{"id":"https://openalex.org/keywords/multilingual-neural-machine-translation","display_name":"Multilingual Neural Machine Translation","score":0.51623},{"id":"https://openalex.org/keywords/meta-learning","display_name":"Meta-Learning","score":0.50538},{"id":"https://openalex.org/keywords/fuse","display_name":"Fuse (electrical)","score":0.43801576}],"concepts":[{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.74894124},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.71239007},{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.7086473},{"id":"https://openalex.org/C2777735758","wikidata":"https://www.wikidata.org/wiki/Q817765","display_name":"Path (computing)","level":2,"score":0.50554967},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.46974212},{"id":"https://openalex.org/C141353440","wikidata":"https://www.wikidata.org/wiki/Q182221","display_name":"Fuse (electrical)","level":2,"score":0.43801576},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.43677056},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.33394897},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.24035123},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.14537391},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.09594223},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2305.05948","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2305.05948","pdf_url":"http://arxiv.org/pdf/2305.05948","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2305.05948","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2305.05948","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4289548192","https://openalex.org/W3176018525","https://openalex.org/W3066373881","https://openalex.org/W3026554633","https://openalex.org/W2952599318","https://openalex.org/W2949454572","https://openalex.org/W2903810591","https://openalex.org/W2903533908","https://openalex.org/W2890256614","https://openalex.org/W2888520903"],"abstract_inverted_index":{"For":[0,16],"years":[1],"the":[2,13,17,45,72,105,110,122,134,144],"model":[3,14,27,42,47,113,145],"performance":[4,120],"in":[5],"machine":[6,99],"learning":[7],"obeyed":[8],"a":[9,49,69,77,85,141,151],"power-law":[10],"relationship":[11],"with":[12,104],"size.":[15],"consideration":[18],"of":[19,74,108],"parameter":[20],"efficiency,":[21],"recent":[22],"studies":[23],"focus":[24],"on":[25,96],"increasing":[26],"depth":[28,146],"rather":[29],"than":[30,121],"width":[31,43,148],"to":[32,66,80,89,133,149],"achieve":[33,115],"better":[34,54,119,152],"performance.":[35],"In":[36],"this":[37],"paper,":[38],"we":[39,61,128],"study":[40],"how":[41],"affects":[44],"Transformer":[46],"through":[48],"parameter-efficient":[50],"multi-path":[51,112,135],"structure.":[52],"To":[53],"fuse":[55,90],"features":[56,92],"extracted":[57],"from":[58],"different":[59],"paths,":[60],"add":[62],"three":[63],"additional":[64],"operations":[65],"each":[67,75],"sublayer:":[68],"normalization":[70],"at":[71],"end":[73],"path,":[76],"cheap":[78],"operation":[79],"produce":[81],"more":[82,131],"features,":[83],"and":[84,137,147],"learnable":[86],"weighted":[87],"mechanism":[88],"all":[91],"flexibly.":[93],"Extensive":[94],"experiments":[95],"12":[97],"WMT":[98],"translation":[100],"tasks":[101],"show":[102],"that,":[103],"same":[106],"number":[107],"parameters,":[109],"shallower":[111],"can":[114],"similar":[116],"or":[117],"even":[118],"deeper":[123],"model.":[124],"It":[125],"reveals":[126],"that":[127],"should":[129,139],"pay":[130],"attention":[132],"structure,":[136],"there":[138],"be":[140],"balance":[142],"between":[143],"train":[150],"large-scale":[153],"Transformer.":[154]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4376167126","counts_by_year":[],"updated_date":"2024-12-05T07:58:57.632592","created_date":"2023-05-12"}