{"id":"https://openalex.org/W4297537644","doi":"https://doi.org/10.1016/j.iswa.2022.200129","title":"CORPURES: Benchmark corpus for urdu extractive summaries and experiments using supervised learning","display_name":"CORPURES: Benchmark corpus for urdu extractive summaries and experiments using supervised learning","publication_year":2022,"publication_date":"2022-09-28","ids":{"openalex":"https://openalex.org/W4297537644","doi":"https://doi.org/10.1016/j.iswa.2022.200129"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.iswa.2022.200129","pdf_url":null,"source":{"id":"https://openalex.org/S4210234522","display_name":"Intelligent Systems with Applications","issn_l":"2667-3053","issn":["2667-3053"],"is_oa":true,"is_in_doaj":true,"is_indexed_in_scopus":true,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"journal-article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1016/j.iswa.2022.200129","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5075898844","display_name":"Muhammad Humayoun","orcid":null},"institutions":[{"id":"https://openalex.org/I119744171","display_name":"Higher Colleges of Technology","ror":"https://ror.org/00qmy9z88","country_code":"AE","type":"education","lineage":["https://openalex.org/I119744171"]}],"countries":["AE"],"is_corresponding":true,"raw_author_name":"Muhammad Humayoun","raw_affiliation_strings":["Computer Information Science Division, Higher Colleges of Technology, Abu Dhabi, United Arab Emirates"],"affiliations":[{"raw_affiliation_string":"Computer Information Science Division, Higher Colleges of Technology, Abu Dhabi, United Arab Emirates","institution_ids":["https://openalex.org/I119744171"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5046108444","display_name":"Naheed Akhtar","orcid":"https://orcid.org/0000-0003-0282-2342"},"institutions":[{"id":"https://openalex.org/I3130814776","display_name":"University of Okara","ror":"https://ror.org/02fmg6q11","country_code":"PK","type":"education","lineage":["https://openalex.org/I3130814776"]},{"id":"https://openalex.org/I5100685","display_name":"University of Education","ror":"https://ror.org/052z7nw84","country_code":"PK","type":"education","lineage":["https://openalex.org/I5100685"]}],"countries":["PK"],"is_corresponding":false,"raw_author_name":"Naheed Akhtar","raw_affiliation_strings":["Department of Computer Science, University of Education, Lahore, Pakistan"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Education, Lahore, Pakistan","institution_ids":["https://openalex.org/I3130814776","https://openalex.org/I5100685"]}]}],"institution_assertions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5075898844"],"corresponding_institution_ids":["https://openalex.org/I119744171"],"apc_list":{"value":1500,"currency":"USD","value_usd":1500},"apc_paid":{"value":1500,"currency":"USD","value_usd":1500},"fwci":0.289,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.36015,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":69,"max":75},"biblio":{"volume":"16","issue":null,"first_page":"200129","last_page":"200129"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9939,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.59890866},{"id":"https://openalex.org/keywords/urdu","display_name":"Urdu","score":0.45721978},{"id":"https://openalex.org/keywords/supervised-learning","display_name":"Supervised Learning","score":0.42958313},{"id":"https://openalex.org/keywords/lexical-analysis","display_name":"Lexical analysis","score":0.4217588},{"id":"https://openalex.org/keywords/text-segmentation","display_name":"Text segmentation","score":0.41871712},{"id":"https://openalex.org/keywords/lemmatisation","display_name":"Lemmatisation","score":0.41234088}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8347307},{"id":"https://openalex.org/C170858558","wikidata":"https://www.wikidata.org/wiki/Q1394144","display_name":"Automatic summarization","level":2,"score":0.811761},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7478002},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.7365638},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.59890866},{"id":"https://openalex.org/C2777350258","wikidata":"https://www.wikidata.org/wiki/Q1617","display_name":"Urdu","level":2,"score":0.45721978},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.42958313},{"id":"https://openalex.org/C176982825","wikidata":"https://www.wikidata.org/wiki/Q835922","display_name":"Lexical analysis","level":2,"score":0.4217588},{"id":"https://openalex.org/C98501671","wikidata":"https://www.wikidata.org/wiki/Q1948408","display_name":"Text segmentation","level":3,"score":0.41871712},{"id":"https://openalex.org/C161831844","wikidata":"https://www.wikidata.org/wiki/Q2554325","display_name":"Lemmatisation","level":2,"score":0.41234088},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.39256057},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.35609454},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.12767035},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.iswa.2022.200129","pdf_url":null,"source":{"id":"https://openalex.org/S4210234522","display_name":"Intelligent Systems with Applications","issn_l":"2667-3053","issn":["2667-3053"],"is_oa":true,"is_in_doaj":true,"is_indexed_in_scopus":true,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":false,"landing_page_url":"https://doaj.org/article/680d3f5e20d3403593efd31098f82dca","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.iswa.2022.200129","pdf_url":null,"source":{"id":"https://openalex.org/S4210234522","display_name":"Intelligent Systems with Applications","issn_l":"2667-3053","issn":["2667-3053"],"is_oa":true,"is_in_doaj":true,"is_indexed_in_scopus":true,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"score":0.63,"display_name":"Quality education","id":"https://metadata.un.org/sdg/4"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":73,"referenced_works":["https://openalex.org/W1482921657","https://openalex.org/W1496516045","https://openalex.org/W1502957213","https://openalex.org/W1526974435","https://openalex.org/W1579835159","https://openalex.org/W1604938182","https://openalex.org/W1632114991","https://openalex.org/W1647671624","https://openalex.org/W1789328935","https://openalex.org/W1847618513","https://openalex.org/W1872699312","https://openalex.org/W1972028996","https://openalex.org/W1974339500","https://openalex.org/W2008495066","https://openalex.org/W2017337590","https://openalex.org/W2043093827","https://openalex.org/W2048207804","https://openalex.org/W2054440265","https://openalex.org/W2054460891","https://openalex.org/W2081580037","https://openalex.org/W2092246763","https://openalex.org/W2096524196","https://openalex.org/W2106393550","https://openalex.org/W2110693578","https://openalex.org/W2121167884","https://openalex.org/W2125055259","https://openalex.org/W2125816832","https://openalex.org/W2128493522","https://openalex.org/W2133990480","https://openalex.org/W2143205289","https://openalex.org/W2148143831","https://openalex.org/W2149593800","https://openalex.org/W2158847908","https://openalex.org/W2162037449","https://openalex.org/W2166347079","https://openalex.org/W2169147927","https://openalex.org/W2188383613","https://openalex.org/W2427789500","https://openalex.org/W2489487449","https://openalex.org/W2497955259","https://openalex.org/W2508023093","https://openalex.org/W2531563875","https://openalex.org/W2531638282","https://openalex.org/W2562609730","https://openalex.org/W2604272474","https://openalex.org/W2607054006","https://openalex.org/W2626207225","https://openalex.org/W2751936342","https://openalex.org/W2754472824","https://openalex.org/W2771976988","https://openalex.org/W2893531323","https://openalex.org/W2901223329","https://openalex.org/W2911964244","https://openalex.org/W2912150279","https://openalex.org/W2924215940","https://openalex.org/W2932164076","https://openalex.org/W2936459239","https://openalex.org/W2944770383","https://openalex.org/W2978806970","https://openalex.org/W3087824800","https://openalex.org/W3103913776","https://openalex.org/W3104887532","https://openalex.org/W4205918858","https://openalex.org/W4237272885","https://openalex.org/W4238042051","https://openalex.org/W4238634189","https://openalex.org/W4241469797","https://openalex.org/W4242448381","https://openalex.org/W4250467157","https://openalex.org/W4251521697","https://openalex.org/W4285718091","https://openalex.org/W4298840395","https://openalex.org/W4313316117"],"related_works":["https://openalex.org/W4399684005","https://openalex.org/W4399576126","https://openalex.org/W4387484878","https://openalex.org/W4382120354","https://openalex.org/W4308083916","https://openalex.org/W3089211180","https://openalex.org/W2973955309","https://openalex.org/W2969595679","https://openalex.org/W2574640638","https://openalex.org/W2202496758"],"abstract_inverted_index":{"Text":[0],"summarization":[1,18,40,123,150],"is":[2,42,191,215],"the":[3,7,13,43,80,99,108,131,138,141,177,196,233,242,253],"process":[4],"of":[5,45,79,140,180,187],"shortening":[6],"text":[8,17,39,122,149],"so":[9],"that":[10,137,185,217],"it":[11,113,214],"conveys":[12],"key":[14],"points.":[15],"Several":[16],"methods":[19,41],"and":[20,34,120,228,236],"benchmark":[21,47,75],"corpora":[22],"are":[23,144,155,203],"available":[24],"for":[25,50,77,130,147,201,211,239],"languages":[26],"like":[27],"English.":[28],"A":[29],"significant":[30],"hurdle":[31],"in":[32],"developing":[33,73],"evaluating":[35],"existing":[36,64,234],"or":[37],"new":[38],"unavailability":[44],"standardized":[46],"corpora,":[48],"especially":[49],"South":[51],"Asian":[52],"languages.":[53],"Among":[54],"other":[55,169],"things,":[56],"a":[57,74,126,158,192,218],"reference":[58],"corpus":[59,76,89,109],"enables":[60],"researchers":[61],"to":[62,110,117,252],"compare":[63,121],"state-of-the-art":[65],"methods.":[66],"Our":[67,134],"study":[68],"addresses":[69],"this":[70],"gap":[71],"by":[72],"one":[78],"widely":[81],"spoken":[82],"yet":[83],"under-resourced":[84],"language":[85],"Urdu.":[86,240],"The":[87],"reported":[88],"contains":[90],"161":[91],"documents":[92],"with":[93,225],"manually":[94],"written":[95],"extractive":[96],"summaries":[97],"from":[98],"newswire":[100],"domain.":[101],"We":[102],"also":[103],"perform":[104],"several":[105],"experiments":[106],"on":[107,189],"show":[111,136],"how":[112],"can":[114],"be":[115],"used":[116],"develop,":[118],"evaluate,":[119],"systems":[124],"using":[125],"supervised":[127,152,212],"learning":[128,153],"approach":[129,194],"Urdu":[132,148,181,202],"language.":[133],"results":[135,250],"state":[139],"art":[142],"classifiers":[143],"good":[145],"candidates":[146],"when":[151],"techniques":[154],"employed.":[156],"Also,":[157],"radical":[159,219],"word":[160,198,207],"segmentation":[161,166,199],"technique":[162],"such":[163,221],"as":[164,222],"fixed-length":[165],"outperforms":[167],"all":[168],"settings":[170],"(Senetnce":[171],"Match":[172],"F1=57%,":[173],"ROUGE-2":[174],"F1=64.4%).":[175],"On":[176,206],"basic":[178],"preprocessing":[179],"texts,":[182],"we":[183],"observe":[184],"tokenization":[186],"words":[188],"space":[190],"reliable":[193],"until":[195],"proper":[197],"tools":[200,238],"mature":[204],"enough.":[205],"similarity":[208],"features":[209],"needed":[210],"learning,":[213],"observed":[216],"stemming":[220,224,235],"Ultra":[223],"length":[226],"(1":[227],"2)":[229],"works":[230],"better":[231],"than":[232],"lemmatization":[237],"Finally,":[241],"artificially":[243],"generated":[244],"datasets":[245],"do":[246],"not":[247],"significantly":[248],"improve":[249],"compared":[251],"original":[254],"data.":[255]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4297537644","counts_by_year":[{"year":2024,"cited_by_count":2}],"updated_date":"2025-03-22T12:47:56.815651","created_date":"2022-09-29"}