{"id":"https://openalex.org/W4378465203","doi":"https://doi.org/10.48550/arxiv.2305.14982","title":"Benchmarking Arabic AI with Large Language Models","display_name":"Benchmarking Arabic AI with Large Language Models","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4378465203","doi":"https://doi.org/10.48550/arxiv.2305.14982"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2305.14982","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2305.14982","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047182137","display_name":"Ahmed Abdelal\u00ed","orcid":"https://orcid.org/0000-0002-4160-8181"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Abdelali, Ahmed","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102924560","display_name":"Hamdy Mubarak","orcid":"https://orcid.org/0000-0002-9051-6240"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mubarak, Hamdy","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022279488","display_name":"Shammur Absar Chowdhury","orcid":"https://orcid.org/0000-0002-1331-2543"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chowdhury, Shammur Absar","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035521035","display_name":"Maram Hasanain","orcid":"https://orcid.org/0000-0002-7466-178X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hasanain, Maram","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072925652","display_name":"Basel Mousi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mousi, Basel","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052314657","display_name":"Sabri Boughorbel","orcid":"https://orcid.org/0000-0003-2734-3356"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Boughorbel, Sabri","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072129532","display_name":"Yassine El Kheir","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kheir, Yassine El","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013350545","display_name":"Daniel Izham","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Izham, Daniel","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002245931","display_name":"Fahim Dalvi","orcid":"https://orcid.org/0000-0003-1183-7837"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dalvi, Fahim","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021151663","display_name":"Majd Hawasly","orcid":"https://orcid.org/0000-0003-1823-5580"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hawasly, Majd","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112924044","display_name":"Nizi Nazar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nazar, Nizi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5092029617","display_name":"Yousseif Elshahawy","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Elshahawy, Yousseif","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104175595","display_name":"Ahmed Ali","orcid":"https://orcid.org/0009-0004-8477-4376"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ali, Ahmed","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032689067","display_name":"Nadir Durrani","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Durrani, Nadir","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113653594","display_name":"Nata\u0161a Mili\u0107-Frayling","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Milic-Frayling, Natasa","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5053436179","display_name":"Firoj Alam","orcid":"https://orcid.org/0000-0001-7172-1997"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alam, Firoj","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":12,"citation_normalized_percentile":{"value":0.99995,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":97},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9978,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9978,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9977,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.9669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.74411213},{"id":"https://openalex.org/keywords/spoken-language","display_name":"Spoken Language","score":0.43508518}],"concepts":[{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.74411213},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.69276965},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6811092},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5641367},{"id":"https://openalex.org/C96455323","wikidata":"https://www.wikidata.org/wiki/Q13955","display_name":"Arabic","level":2,"score":0.51993716},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.49045584},{"id":"https://openalex.org/C2776230583","wikidata":"https://www.wikidata.org/wiki/Q1322198","display_name":"Spoken language","level":2,"score":0.43508518},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3252505},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.26031327},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C162853370","wikidata":"https://www.wikidata.org/wiki/Q39809","display_name":"Marketing","level":1,"score":0.0},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2305.14982","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2305.14982","pdf_url":"http://arxiv.org/pdf/2305.14982","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2305.14982","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2305.14982","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.53,"display_name":"Quality education"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W435179959","https://openalex.org/W4238897586","https://openalex.org/W2619091065","https://openalex.org/W2291782699","https://openalex.org/W2284465472","https://openalex.org/W2112883198","https://openalex.org/W2059640416","https://openalex.org/W2000169967","https://openalex.org/W1993948687","https://openalex.org/W1490753184"],"abstract_inverted_index":{"Recent":[0],"advancements":[1],"in":[2,108,137],"Large":[3],"Language":[4,42],"Models":[5],"(LLMs)":[6],"have":[7],"significantly":[8],"influenced":[9],"the":[10,118,164],"landscape":[11],"of":[12,97,111,166],"language":[13],"and":[14,33,45,52,68,72,99,124,171],"speech":[15,172],"research.":[16],"Despite":[17],"this":[18,37],"progress,":[19],"these":[20,155],"models":[21,28,60,123,133,147],"lack":[22],"specific":[23],"benchmarking":[24],"against":[25],"state-of-the-art":[26],"(SOTA)":[27],"tailored":[29],"to":[30,76,153],"particular":[31],"languages":[32],"tasks.":[34,174],"LAraBench":[35],"addresses":[36],"gap":[38,120],"for":[39,102,168],"Arabic":[40,169],"Natural":[41],"Processing":[43,47],"(NLP)":[44],"Speech":[46],"tasks,":[48],"including":[49],"sequence":[50],"tagging":[51],"content":[53],"classification":[54],"across":[55,81],"different":[56],"domains.":[57],"We":[58],"utilized":[59],"such":[61],"as":[62],"GPT-3.5-turbo,":[63],"GPT-4,":[64],"BLOOMZ,":[65],"Jais-13b-chat,":[66],"Whisper,":[67],"USM,":[69],"employing":[70],"zero":[71],"few-shot":[73,149],"learning":[74,150],"techniques":[75,151],"tackle":[77],"33":[78],"distinct":[79],"tasks":[80],"61":[82],"publicly":[83],"available":[84],"datasets.":[85],"This":[86,105],"involved":[87],"98":[88],"experimental":[89],"setups,":[90],"encompassing":[91],"~296K":[92],"data":[93],"points,":[94],"~46":[95],"hours":[96],"speech,":[98],"30":[100],"sentences":[101],"Text-to-Speech":[103],"(TTS).":[104],"effort":[106],"resulted":[107],"330+":[109],"sets":[110],"experiments.":[112],"Our":[113,158],"analysis":[114],"focused":[115],"on":[116],"measuring":[117],"performance":[119,156],"between":[121],"SOTA":[122,132],"LLMs.":[125],"The":[126],"overarching":[127],"trend":[128],"observed":[129],"was":[130],"that":[131],"generally":[134],"outperformed":[135],"LLMs":[136,167],"zero-shot":[138],"learning,":[139],"with":[140,148],"a":[141],"few":[142],"exceptions.":[143],"Notably,":[144],"larger":[145],"computational":[146],"managed":[152],"reduce":[154],"gaps.":[157],"findings":[159],"provide":[160],"valuable":[161],"insights":[162],"into":[163],"applicability":[165],"NLP":[170],"processing":[173]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4378465203","counts_by_year":[{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":7}],"updated_date":"2025-01-04T16:38:24.486597","created_date":"2023-05-27"}