{"id":"https://openalex.org/W4399400985","doi":"https://doi.org/10.48550/arxiv.2406.01255","title":"On the Nonlinearity of Layer Normalization","display_name":"On the Nonlinearity of Layer Normalization","publication_year":2024,"publication_date":"2024-06-03","ids":{"openalex":"https://openalex.org/W4399400985","doi":"https://doi.org/10.48550/arxiv.2406.01255"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2406.01255","pdf_url":"https://arxiv.org/pdf/2406.01255","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2406.01255","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101233662","display_name":"Yunhao Ni","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ni, Yunhao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108901356","display_name":"Yuxin Guo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guo, Yuxin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113242149","display_name":"Junlong Jia","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jia, Junlong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5101670805","display_name":"Lei Huang","orcid":"https://orcid.org/0000-0002-3131-6417"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Lei","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":77},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11236","display_name":"Control Systems and Identification","score":0.7807,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11236","display_name":"Control Systems and Identification","score":0.7807,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization","score":0.8096994}],"concepts":[{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.8096994},{"id":"https://openalex.org/C158622935","wikidata":"https://www.wikidata.org/wiki/Q660848","display_name":"Nonlinear system","level":2,"score":0.55947435},{"id":"https://openalex.org/C2779227376","wikidata":"https://www.wikidata.org/wiki/Q6505497","display_name":"Layer (electronics)","level":2,"score":0.45711493},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.38054985},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.33323747},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.23147273},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.20257425},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.12369624},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.109273106},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2406.01255","pdf_url":"https://arxiv.org/pdf/2406.01255","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2406.01255","pdf_url":"https://arxiv.org/pdf/2406.01255","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W3002753104","https://openalex.org/W2748952813","https://openalex.org/W2591697403","https://openalex.org/W2142036596","https://openalex.org/W2077600819","https://openalex.org/W2072657027","https://openalex.org/W2061531152","https://openalex.org/W2007980826","https://openalex.org/W1979597421"],"abstract_inverted_index":{"Layer":[0],"normalization":[1],"(LN)":[2],"is":[3,106,140],"a":[4,21,40],"ubiquitous":[5],"technique":[6],"in":[7,71],"deep":[8],"learning":[9],"but":[10],"our":[11,117,121,143],"theoretical":[12,23],"understanding":[13],"to":[14,28,51,125],"it":[15],"remains":[16],"elusive.":[17],"This":[18],"paper":[19],"investigates":[20],"new":[22],"direction":[24],"for":[25],"LN,":[26,136],"regarding":[27],"its":[29],"nonlinearity":[30,96,134],"and":[31,47,74,113,131,137],"representation":[32,37],"capacity.":[33],"We":[34,54,82],"investigate":[35],"the":[36,85,89,133,138],"capacity":[38],"of":[39,45,88,92,97,135],"network":[41],"with":[42,61,67,110],"layerwise":[43],"composition":[44],"linear":[46],"LN":[48,76,98],"transformations,":[49],"referred":[50],"as":[52],"LN-Net.":[53,94],"theoretically":[55,108],"show":[56,84],"that,":[57],"given":[58],"$m$":[59],"samples":[60],"any":[62],"label":[63],"assignment,":[64],"an":[65,93],"LN-Net":[66],"only":[68],"3":[69],"neurons":[70],"each":[72],"layer":[73],"$O(m)$":[75],"layers":[77],"can":[78,99],"correctly":[79],"classify":[80],"them.":[81],"further":[83],"lower":[86],"bound":[87],"VC":[90],"dimension":[91],"The":[95],"be":[100],"amplified":[101],"by":[102,116,129,142],"group":[103],"partition,":[104],"which":[105],"also":[107],"demonstrated":[109],"mild":[111],"assumption":[112],"empirically":[114],"supported":[115,141],"experiments.":[118,144],"Based":[119],"on":[120],"analyses,":[122],"we":[123],"consider":[124],"design":[126],"neural":[127],"architecture":[128],"exploiting":[130],"amplifying":[132],"effectiveness":[139]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4399400985","counts_by_year":[],"updated_date":"2025-04-23T18:14:26.045236","created_date":"2024-06-07"}