{"id":"https://openalex.org/W4402856999","doi":"https://doi.org/10.48550/arxiv.2407.15188","title":"Overview of Speaker Modeling and Its Applications: From the Lens of Deep\n Speaker Representation Learning","display_name":"Overview of Speaker Modeling and Its Applications: From the Lens of Deep\n Speaker Representation Learning","publication_year":2024,"publication_date":"2024-07-21","ids":{"openalex":"https://openalex.org/W4402856999","doi":"https://doi.org/10.48550/arxiv.2407.15188"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.15188","pdf_url":"http://arxiv.org/pdf/2407.15188","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2407.15188","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100328312","display_name":"Shuai Wang","orcid":"https://orcid.org/0000-0002-7897-2024"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Shuai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101416769","display_name":"Zhengyang Chen","orcid":"https://orcid.org/0000-0003-1293-8146"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Zhengyang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004287909","display_name":"Kong Aik Lee","orcid":"https://orcid.org/0000-0001-9133-3000"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lee, Kong Aik","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100341993","display_name":"Yanmin Qian","orcid":"https://orcid.org/0000-0002-0314-3790"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qian, Yanmin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5032690182","display_name":"Haizhou Li","orcid":"https://orcid.org/0000-0001-9158-9401"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Haizhou","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":77},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9716,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9716,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/representation","display_name":"Representation","score":0.66071326}],"concepts":[{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.66071326},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.5841243},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5456995},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.41393423},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.40154707},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.33637792},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.32048228},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.08786529},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.058683097},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.15188","pdf_url":"http://arxiv.org/pdf/2407.15188","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.15188","pdf_url":"http://arxiv.org/pdf/2407.15188","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4313854686","https://openalex.org/W4297807400","https://openalex.org/W321304764","https://openalex.org/W3162054169","https://openalex.org/W289407349","https://openalex.org/W2368768466","https://openalex.org/W2249138175","https://openalex.org/W2029134149","https://openalex.org/W1813780412","https://openalex.org/W1491159402"],"abstract_inverted_index":{"Speaker":[0],"individuality":[1],"information":[2],"is":[3,73],"among":[4],"the":[5,51,64,83],"most":[6],"critical":[7],"elements":[8],"within":[9,63],"speech":[10,26,34],"signals.":[11],"By":[12],"thoroughly":[13],"and":[14,36,56],"accurately":[15],"modeling":[16,61,85,97],"this":[17,41],"information,":[18],"it":[19],"can":[20],"be":[21],"utilized":[22],"in":[23,82],"various":[24],"intelligent":[25],"applications,":[27],"such":[28],"as":[29,87,89],"speaker":[30,32,38,60,84,96],"recognition,":[31],"diarization,":[33],"synthesis,":[35],"target":[37],"extraction.":[39],"In":[40],"article,":[42],"we":[43],"aim":[44],"to":[45,75,94,99],"present,":[46],"from":[47],"a":[48,77],"unique":[49],"perspective,":[50],"developmental":[52],"history,":[53],"paradigm":[54],"shifts,":[55],"application":[57],"domains":[58],"of":[59,66],"technologies":[62],"context":[65],"deep":[67],"representation":[68],"learning":[69],"framework.":[70],"This":[71],"review":[72],"designed":[74],"provide":[76],"clear":[78],"reference":[79],"for":[80,90],"researchers":[81],"field,":[86],"well":[88],"those":[91],"who":[92],"wish":[93],"apply":[95],"techniques":[98],"specific":[100],"downstream":[101],"tasks.":[102]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4402856999","counts_by_year":[],"updated_date":"2025-04-23T22:10:13.582690","created_date":"2024-09-26"}