{"id":"https://openalex.org/W4387390130","doi":"https://doi.org/10.48550/arxiv.2310.02751","title":"SHOT: Suppressing the Hessian along the Optimization Trajectory for Gradient-Based Meta-Learning","display_name":"SHOT: Suppressing the Hessian along the Optimization Trajectory for Gradient-Based Meta-Learning","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4387390130","doi":"https://doi.org/10.48550/arxiv.2310.02751"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2310.02751","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2310.02751","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5114088588","display_name":"JunHoo Lee","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lee, JunHoo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103134818","display_name":"Jayeon Yoo","orcid":"https://orcid.org/0000-0002-8461-2260"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yoo, Jayeon","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5084897975","display_name":"Nojun Kwak","orcid":"https://orcid.org/0000-0002-1792-0327"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kwak, Nojun","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.787004,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":68,"max":79},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9987,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9987,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9879,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9717,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hessian-matrix","display_name":"Hessian matrix","score":0.9374016},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.6016687},{"id":"https://openalex.org/keywords/few-shot-learning","display_name":"Few-Shot Learning","score":0.558524},{"id":"https://openalex.org/keywords/robust-learning","display_name":"Robust Learning","score":0.543283},{"id":"https://openalex.org/keywords/meta-learning","display_name":"Meta-Learning","score":0.537132},{"id":"https://openalex.org/keywords/hyperparameter-optimization","display_name":"Hyperparameter Optimization","score":0.526033},{"id":"https://openalex.org/keywords/semi-supervised-learning","display_name":"Semi-Supervised Learning","score":0.514212},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.47476056},{"id":"https://openalex.org/keywords/optimization-algorithm","display_name":"Optimization algorithm","score":0.45460033}],"concepts":[{"id":"https://openalex.org/C203616005","wikidata":"https://www.wikidata.org/wiki/Q620495","display_name":"Hessian matrix","level":2,"score":0.9374016},{"id":"https://openalex.org/C2778344882","wikidata":"https://www.wikidata.org/wiki/Q278938","display_name":"Shot (pellet)","level":2,"score":0.73656774},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.72711074},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.655174},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.6016687},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.47476056},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47333437},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.45707613},{"id":"https://openalex.org/C2987595161","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Optimization algorithm","level":2,"score":0.45460033},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.43326056},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3184011},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.25712296},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.17382374},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.07116768},{"id":"https://openalex.org/C111368507","wikidata":"https://www.wikidata.org/wiki/Q43518","display_name":"Oceanography","level":1,"score":0.0},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C1276947","wikidata":"https://www.wikidata.org/wiki/Q333","display_name":"Astronomy","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2310.02751","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2310.02751","pdf_url":"http://arxiv.org/pdf/2310.02751","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2310.02751","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2310.02751","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4283017538","https://openalex.org/W3212179065","https://openalex.org/W3087997050","https://openalex.org/W2611031068","https://openalex.org/W2348923905","https://openalex.org/W2184070258","https://openalex.org/W2126585480","https://openalex.org/W2073248645","https://openalex.org/W1996936972","https://openalex.org/W1704347466"],"abstract_inverted_index":{"In":[0],"this":[1,23],"paper,":[2],"we":[3,25,104],"hypothesize":[4],"that":[5,38,125],"gradient-based":[6],"meta-learning":[7],"(GBML)":[8],"implicitly":[9],"suppresses":[10],"the":[11,14,18,32,35,40,43,46,53,56,68,72,81,100,128],"Hessian":[12,33,54],"along":[13,34],"optimization":[15],"trajectory":[16],"in":[17,55,86],"inner":[19,57],"loop.":[20,58],"Based":[21],"on":[22,108],"hypothesis,":[24],"introduce":[26],"an":[27],"algorithm":[28,82],"called":[29],"SHOT":[30,64,126],"(Suppressing":[31],"Optimization":[36],"Trajectory)":[37],"minimizes":[39],"distance":[41],"between":[42],"parameters":[44],"of":[45,71,102],"target":[47],"and":[48,83,92,113,123],"reference":[49],"models":[50],"to":[51,79,94],"suppress":[52],"Despite":[59],"dealing":[60],"with":[61],"high-order":[62],"terms,":[63],"does":[65],"not":[66],"increase":[67],"computational":[69],"complexity":[70],"baseline":[73],"model":[74],"much.":[75],"It":[76],"is":[77,132],"agnostic":[78],"both":[80],"architecture":[84],"used":[85],"GBML,":[87],"making":[88],"it":[89],"highly":[90],"versatile":[91],"applicable":[93],"any":[95],"GBML":[96],"baseline.":[97,130],"To":[98],"validate":[99],"effectiveness":[101],"SHOT,":[103],"conduct":[105],"empirical":[106],"tests":[107],"standard":[109],"few-shot":[110],"learning":[111],"tasks":[112],"qualitatively":[114],"analyze":[115],"its":[116],"dynamics.":[117],"We":[118],"confirm":[119],"our":[120],"hypothesis":[121],"empirically":[122],"demonstrate":[124],"outperforms":[127],"corresponding":[129],"Code":[131],"available":[133],"at:":[134],"https://github.com/JunHoo-Lee/SHOT":[135]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4387390130","counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2024-12-05T04:21:59.765588","created_date":"2023-10-06"}