{"id":"https://openalex.org/W4387561266","doi":"https://doi.org/10.48550/arxiv.2310.06588","title":"FTFT: efficient and robust Fine-Tuning by transFerring Training dynamics","display_name":"FTFT: efficient and robust Fine-Tuning by transFerring Training dynamics","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4387561266","doi":"https://doi.org/10.48550/arxiv.2310.06588"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2310.06588","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2310.06588","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5043954721","display_name":"Yupei Du","orcid":"https://orcid.org/0009-0008-6751-6112"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Du, Yupei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027041248","display_name":"Albert Gatt","orcid":"https://orcid.org/0000-0001-6388-8244"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gatt, Albert","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5004209770","display_name":"Dong Nguyen","orcid":"https://orcid.org/0000-0002-6062-3117"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nguyen, Dong","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":{"min":0,"max":67},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10784","display_name":"Muscle activation and electromyography studies","score":0.6753,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10784","display_name":"Muscle activation and electromyography studies","score":0.6753,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/dynamics","display_name":"Dynamics","score":0.7163544}],"concepts":[{"id":"https://openalex.org/C145912823","wikidata":"https://www.wikidata.org/wiki/Q113558","display_name":"Dynamics (music)","level":2,"score":0.7163544},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.7030798},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5210068},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.137647},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.044699043},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2310.06588","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2310.06588","pdf_url":"http://arxiv.org/pdf/2310.06588","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2310.06588","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2310.06588","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality education","score":0.63}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2810751659","https://openalex.org/W2748952813","https://openalex.org/W258997015","https://openalex.org/W2390279801","https://openalex.org/W2376932109","https://openalex.org/W2358668433","https://openalex.org/W230091440","https://openalex.org/W2233261550","https://openalex.org/W2001405890"],"abstract_inverted_index":{"Despite":[0],"the":[1,27,38,54,73,160],"massive":[2],"success":[3],"of":[4,29,48],"fine-tuning":[5,34,58,72,105,130],"Pre-trained":[6],"Language":[7],"Models":[8],"(PLMs),":[9],"they":[10],"remain":[11],"susceptible":[12],"to":[13,165],"out-of-distribution":[14],"input.":[15],"Dataset":[16],"cartography":[17],"is":[18,78],"a":[19,35,46,128],"simple":[20],"yet":[21],"effective":[22],"dual-model":[23],"approach":[24,70],"that":[25,89,103],"improves":[26],"robustness":[28,154],"fine-tuned":[30],"PLMs.":[31,83],"It":[32],"involves":[33],"model":[36,75,97],"on":[37,53,61,123],"original":[39],"training":[40,50,55,91,111,115,161],"set":[41],"(i.e.":[42,65],"reference":[43,146],"model),":[44],"selecting":[45],"subset":[47],"important":[49],"instances":[51,112],"based":[52],"dynamics,":[56],"and":[57,99,102,148],"again":[59],"only":[60],"these":[62,109,124],"selected":[63,110],"examples":[64],"main":[66,106],"model).":[67],"However,":[68],"this":[69,85],"requires":[71],"same":[74],"twice,":[76],"which":[77],"computationally":[79],"expensive":[80],"for":[81],"large":[82],"In":[84],"paper,":[86],"we":[87,126],"show":[88],"(1)":[90],"dynamics":[92,136],"are":[93],"highly":[94],"transferable":[95],"across":[96],"sizes":[98],"pre-training":[100],"methods,":[101],"(2)":[104],"models":[107,147],"using":[108],"achieves":[113,153],"higher":[114],"efficiency":[116],"than":[117],"empirical":[118],"risk":[119],"minimization":[120],"(ERM).":[121],"Building":[122],"observations,":[125],"propose":[127],"novel":[129],"approach:":[131],"Fine-Tuning":[132],"by":[133,163],"transFerring":[134],"Training":[135],"(FTFT).":[137],"Compared":[138],"with":[139],"dataset":[140],"cartography,":[141],"FTFT":[142,152],"uses":[143],"more":[144],"efficient":[145],"aggressive":[149],"early":[150],"stopping.":[151],"improvements":[155],"over":[156],"ERM":[157],"while":[158],"lowering":[159],"cost":[162],"up":[164],"$\\sim":[166],"50\\%$.":[167]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4387561266","counts_by_year":[],"updated_date":"2025-01-06T06:30:48.415273","created_date":"2023-10-12"}