{"id":"https://openalex.org/W4320340995","doi":"https://doi.org/10.48550/arxiv.2206.15144","title":"Neural Networks can Learn Representations with Gradient Descent","display_name":"Neural Networks can Learn Representations with Gradient Descent","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4320340995","doi":"https://doi.org/10.48550/arxiv.2206.15144"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2206.15144","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2206.15144","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5051442769","display_name":"Alex Damian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Damian, Alex","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059740024","display_name":"Jason D. Lee","orcid":"https://orcid.org/0000-0003-0064-7800"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lee, Jason D.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5046962187","display_name":"Mahdi Soltanolkotabi","orcid":"https://orcid.org/0000-0003-2101-6418"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Soltanolkotabi, Mahdi","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.824796,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":78,"max":81},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9908,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9908,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12676","display_name":"Machine Learning and ELM","score":0.9736,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.957,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/star","display_name":"Star (game theory)","score":0.72963625},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.6356793},{"id":"https://openalex.org/keywords/representation","display_name":"Representation","score":0.5915615},{"id":"https://openalex.org/keywords/transfer-of-learning","display_name":"Transfer of learning","score":0.55884385}],"concepts":[{"id":"https://openalex.org/C153258448","wikidata":"https://www.wikidata.org/wiki/Q1199743","display_name":"Gradient descent","level":3,"score":0.7585174},{"id":"https://openalex.org/C2780897414","wikidata":"https://www.wikidata.org/wiki/Q7600592","display_name":"Star (game theory)","level":2,"score":0.72963625},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.6356793},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5915615},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.57513577},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.55884385},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5454481},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.46784624},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.46627027},{"id":"https://openalex.org/C90119067","wikidata":"https://www.wikidata.org/wiki/Q43260","display_name":"Polynomial","level":2,"score":0.46453196},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.4555896},{"id":"https://openalex.org/C122280245","wikidata":"https://www.wikidata.org/wiki/Q620622","display_name":"Kernel method","level":3,"score":0.43246055},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.32510465},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.21712592},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.09249154},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2206.15144","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2206.15144","pdf_url":"http://arxiv.org/pdf/2206.15144","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2206.15144","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2206.15144","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4386075310","https://openalex.org/W3123056048","https://openalex.org/W2363184354","https://openalex.org/W2169565408","https://openalex.org/W2150638158","https://openalex.org/W2137862631","https://openalex.org/W2127229869","https://openalex.org/W2121506664","https://openalex.org/W2089892314","https://openalex.org/W1603091392"],"abstract_inverted_index":{"Significant":[0],"theoretical":[1],"work":[2],"has":[3,230],"established":[4],"that":[5,25,42,79,89,146,164,223],"in":[6,20,100,156,184,196,205],"specific":[7],"regimes,":[8],"neural":[9,26,70],"networks":[10,27],"trained":[11],"by":[12,40,55,76],"gradient":[13,64,165],"descent":[14,65,166],"behave":[15],"like":[16],"kernel":[17,56,74,102,158],"methods.":[18],"However,":[19],"practice,":[21],"it":[22,143],"is":[23,44,98,141,144,163],"known":[24,145],"strongly":[28],"outperform":[29],"their":[30],"associated":[31],"kernels.":[32],"In":[33],"this":[34,38],"work,":[35],"we":[36,105,221],"explain":[37],"gap":[39],"demonstrating":[41],"there":[43],"a":[45,67,116,168,197,224,231],"large":[46],"class":[47],"of":[48,109,121,139,170,189,236],"functions":[49],"which":[50,97,112,173],"cannot":[51],"be":[52,60],"efficiently":[53],"learned":[54,62],"methods":[57],"but":[58,216],"can":[59],"easily":[61],"with":[63,132],"on":[66,114,176],"two":[68],"layer":[69],"network":[71],"outside":[72],"the":[73,83,101,107,122,137,157,171,177,202,206,212],"regime":[75],"learning":[77,110,199,229],"representations":[78,91],"are":[80,151],"relevant":[81,118,179],"to":[82,153,180],"target":[84,209,232],"task.":[85],"We":[86],"also":[87],"demonstrate":[88],"these":[90],"allow":[92],"for":[93,227],"efficient":[94],"transfer":[95,198,228],"learning,":[96],"impossible":[99],"regime.":[103,159],"Specifically,":[104],"consider":[106],"problem":[108],"polynomials":[111],"depend":[113],"only":[115,175],"few":[117],"directions,":[119],"i.e.":[120],"form":[123],"$f^\\star(x)":[124],"=":[125],"g(Ux)$":[126],"where":[127,201],"$U:":[128],"\\R^d":[129],"\\to":[130],"\\R^r$":[131],"$d":[133],"\\gg":[134],"r$.":[135],"When":[136],"degree":[138],"$f^\\star$":[140,155],"$p$,":[142],"$n":[147],"\\asymp":[148],"d^p$":[149],"samples":[150],"necessary":[152],"learn":[154],"Our":[160],"primary":[161],"result":[162],"learns":[167],"representation":[169,214],"data":[172,203],"depends":[174],"directions":[178],"$f^\\star$.":[181],"This":[182],"results":[183],"an":[185],"improved":[186],"sample":[187,233],"complexity":[188,234],"$n\\asymp":[190],"d^2":[191],"r":[192],"+":[193],"dr^p$.":[194],"Furthermore,":[195],"setup":[200],"distributions":[204],"source":[207],"and":[208],"domain":[210],"share":[211],"same":[213],"$U$":[215],"have":[217],"different":[218],"polynomial":[219],"heads":[220],"show":[222],"popular":[225],"heuristic":[226],"independent":[235],"$d$.":[237]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4320340995","counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":3}],"updated_date":"2025-04-25T11:56:52.567143","created_date":"2023-02-13"}