{"id":"https://openalex.org/W4403853665","doi":"https://doi.org/10.48550/arxiv.2409.18511","title":"Do We Need Domain-Specific Embedding Models? An Empirical Investigation","display_name":"Do We Need Domain-Specific Embedding Models? An Empirical Investigation","publication_year":2024,"publication_date":"2024-09-27","ids":{"openalex":"https://openalex.org/W4403853665","doi":"https://doi.org/10.48550/arxiv.2409.18511"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2409.18511","pdf_url":"http://arxiv.org/pdf/2409.18511","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2409.18511","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5085140523","display_name":"Yuan Yan Tang","orcid":"https://orcid.org/0000-0002-6887-130X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tang, Yixuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5101416986","display_name":"Yixin Yang","orcid":"https://orcid.org/0000-0001-7997-3849"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Yi","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":78},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.2471,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.2471,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.64447683},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.57302076},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.48954996},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.36156705},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.24992213},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2154252},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.05202663}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2409.18511","pdf_url":"http://arxiv.org/pdf/2409.18511","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2409.18511","pdf_url":"http://arxiv.org/pdf/2409.18511","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4396701345","https://openalex.org/W4391913857","https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2376932109","https://openalex.org/W2358668433","https://openalex.org/W2081900870","https://openalex.org/W2001405890"],"abstract_inverted_index":{"Embedding":[0,53,109],"models":[1,43,72,76,131,184,202,232],"play":[2],"a":[3,62,112,136],"crucial":[4],"role":[5],"in":[6,17,173,233],"representing":[7],"and":[8,134,168,190],"retrieving":[9],"information":[10],"across":[11],"various":[12],"NLP":[13],"applications.":[14],"Recent":[15],"advancements":[16],"Large":[18],"Language":[19],"Models":[20],"(LLMs)":[21],"have":[22],"further":[23],"enhanced":[24],"the":[25,67,97,105,125,149,197,214,234],"performance":[26,126,138,143,198,210],"of":[27,36,69,118,127,199],"embedding":[28,71,130,183,201,218,222,231],"models,":[29],"which":[30],"are":[31,44,77],"trained":[32,78],"on":[33,47,79,132,144,203,211,228],"massive":[34],"amounts":[35],"text":[37,121],"covering":[38],"almost":[39],"every":[40],"domain.":[41],"These":[42],"often":[45],"benchmarked":[46],"general-purpose":[48,75,200],"datasets":[49],"like":[50],"Massive":[51,107],"Text":[52,108],"Benchmark":[54,110],"(MTEB),":[55],"where":[56],"they":[57],"demonstrate":[58],"superior":[59],"performance.":[60],"However,":[61],"critical":[63],"question":[64],"arises:":[65],"Is":[66],"development":[68],"domain-specific":[70,120,188,217,221,230],"necessary":[73],"when":[74],"vast":[80],"corpora":[81],"that":[82,116,151,181,196],"already":[83],"include":[84],"specialized":[85],"domain":[86,99],"texts?":[87],"In":[88],"this":[89,94,152,171],"paper,":[90],"we":[91,160,194],"empirically":[92],"investigate":[93],"question,":[95],"choosing":[96],"finance":[98],"as":[100],"an":[101],"example.":[102],"We":[103,123],"introduce":[104],"Finance":[106],"(FinMTEB),":[111],"counterpart":[113],"to":[114,141,164,186],"MTEB":[115,204],"consists":[117],"financial":[119],"datasets.":[122],"evaluate":[124],"seven":[128],"state-of-the-art":[129,182],"FinMTEB":[133,237],"observe":[135],"significant":[137],"drop":[139,153],"compared":[140],"their":[142,209],"MTEB.":[145],"To":[146],"account":[147],"for":[148,170,216,220],"possibility":[150],"is":[154,205],"driven":[155],"by":[156],"FinMTEB's":[157],"higher":[158],"complexity,":[159],"propose":[161],"four":[162],"measures":[163],"quantify":[165],"dataset":[166],"complexity":[167],"control":[169],"factor":[172],"our":[174],"analysis.":[175],"Our":[176],"analysis":[177],"provides":[178],"compelling":[179],"evidence":[180],"struggle":[185],"capture":[187],"linguistic":[189],"semantic":[191],"patterns.":[192],"Moreover,":[193],"find":[195],"not":[206],"correlated":[207],"with":[208,239],"FinMTEB,":[212],"indicating":[213],"need":[215],"benchmarks":[219],"models.":[223],"This":[224],"study":[225],"sheds":[226],"light":[227],"developing":[229],"LLM":[235],"era.":[236],"comes":[238],"open-source":[240],"code":[241],"at":[242],"https://github.com/yixuantt/FinMTEB":[243]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4403853665","counts_by_year":[],"updated_date":"2025-04-10T11:46:56.668861","created_date":"2024-10-29"}