{"id":"https://openalex.org/W3101645173","doi":"https://doi.org/10.1145/3383583.3398529","title":"Classification and Clustering of arXiv Documents, Sections, and Abstracts, Comparing Encodings of Natural and Mathematical Language","display_name":"Classification and Clustering of arXiv Documents, Sections, and Abstracts, Comparing Encodings of Natural and Mathematical Language","publication_year":2020,"publication_date":"2020-08-01","ids":{"openalex":"https://openalex.org/W3101645173","doi":"https://doi.org/10.1145/3383583.3398529","mag":"3101645173"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/3383583.3398529","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"proceedings-article","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2005.11021","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5069041521","display_name":"Philipp Scharpf","orcid":"https://orcid.org/0000-0002-4212-0508"},"institutions":[{"id":"https://openalex.org/I189712700","display_name":"University of Konstanz","ror":"https://ror.org/0546hnb39","country_code":"DE","type":"funder","lineage":["https://openalex.org/I189712700"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Philipp Scharpf","raw_affiliation_strings":["University of Konstanz, Konstanz, Germany"],"affiliations":[{"raw_affiliation_string":"University of Konstanz, Konstanz, Germany","institution_ids":["https://openalex.org/I189712700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038664667","display_name":"Moritz Schubotz","orcid":"https://orcid.org/0000-0001-7141-4997"},"institutions":[{"id":"https://openalex.org/I167360494","display_name":"University of Wuppertal","ror":"https://ror.org/00613ak93","country_code":"DE","type":"funder","lineage":["https://openalex.org/I167360494"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Moritz Schubotz","raw_affiliation_strings":["University of Wuppertal & FIZ Karlsruhe, Karlsruhe, Germany"],"affiliations":[{"raw_affiliation_string":"University of Wuppertal & FIZ Karlsruhe, Karlsruhe, Germany","institution_ids":["https://openalex.org/I167360494"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111635384","display_name":"Abdou Youssef","orcid":null},"institutions":[{"id":"https://openalex.org/I193531525","display_name":"George Washington University","ror":"https://ror.org/00y4zzh67","country_code":"US","type":"funder","lineage":["https://openalex.org/I193531525"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Abdou Youssef","raw_affiliation_strings":["George Washington University, Washington, WA, USA"],"affiliations":[{"raw_affiliation_string":"George Washington University, Washington, WA, USA","institution_ids":["https://openalex.org/I193531525"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077372505","display_name":"Felix Hamborg","orcid":"https://orcid.org/0000-0003-2444-8056"},"institutions":[{"id":"https://openalex.org/I189712700","display_name":"University of Konstanz","ror":"https://ror.org/0546hnb39","country_code":"DE","type":"funder","lineage":["https://openalex.org/I189712700"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Felix Hamborg","raw_affiliation_strings":["University of Konstanz, Konstanz, Germany"],"affiliations":[{"raw_affiliation_string":"University of Konstanz, Konstanz, Germany","institution_ids":["https://openalex.org/I189712700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060549879","display_name":"Norman Meuschke","orcid":"https://orcid.org/0000-0003-4648-8198"},"institutions":[{"id":"https://openalex.org/I167360494","display_name":"University of Wuppertal","ror":"https://ror.org/00613ak93","country_code":"DE","type":"funder","lineage":["https://openalex.org/I167360494"]},{"id":"https://openalex.org/I189712700","display_name":"University of Konstanz","ror":"https://ror.org/0546hnb39","country_code":"DE","type":"funder","lineage":["https://openalex.org/I189712700"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Norman Meuschke","raw_affiliation_strings":["University of Wuppertal & University of Konstanz, Wuppertal, Germany"],"affiliations":[{"raw_affiliation_string":"University of Wuppertal & University of Konstanz, Wuppertal, Germany","institution_ids":["https://openalex.org/I167360494","https://openalex.org/I189712700"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5058837356","display_name":"B\u00e9la Gipp","orcid":"https://orcid.org/0000-0001-6522-3019"},"institutions":[{"id":"https://openalex.org/I167360494","display_name":"University of Wuppertal","ror":"https://ror.org/00613ak93","country_code":"DE","type":"funder","lineage":["https://openalex.org/I167360494"]},{"id":"https://openalex.org/I189712700","display_name":"University of Konstanz","ror":"https://ror.org/0546hnb39","country_code":"DE","type":"funder","lineage":["https://openalex.org/I189712700"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Bela Gipp","raw_affiliation_strings":["University of Wuppertal & University of Konstanz, Wuppertal, Germany"],"affiliations":[{"raw_affiliation_string":"University of Wuppertal & University of Konstanz, Wuppertal, Germany","institution_ids":["https://openalex.org/I167360494","https://openalex.org/I189712700"]}]}],"institution_assertions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":21,"citation_normalized_percentile":{"value":0.999799,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":92,"max":93},"biblio":{"volume":null,"issue":null,"first_page":"137","last_page":"146"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T13523","display_name":"Mathematics, Computing, and Information Processing","score":0.9983,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13523","display_name":"Mathematics, Computing, and Information Processing","score":0.9983,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.997,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9927,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.77944714},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.66645736},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.62527585},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.58195776},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.51927495},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.40612042},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.3628292}],"mesh":[],"locations_count":4,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/3383583.3398529","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2005.11021","pdf_url":"http://arxiv.org/pdf/2005.11021","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2005.11021","pdf_url":"https://arxiv.org/pdf/2005.11021","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2005.11021","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2005.11021","pdf_url":"http://arxiv.org/pdf/2005.11021","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"display_name":"Quality education","id":"https://metadata.un.org/sdg/4","score":0.85}],"grants":[],"datasets":[],"versions":["https://openalex.org/W3028148433","https://openalex.org/W3101645173"],"referenced_works_count":48,"referenced_works":["https://openalex.org/W1493584520","https://openalex.org/W1523300416","https://openalex.org/W1538791262","https://openalex.org/W1614298861","https://openalex.org/W1840435438","https://openalex.org/W1888893865","https://openalex.org/W2001771035","https://openalex.org/W2009957709","https://openalex.org/W2031976076","https://openalex.org/W2080133951","https://openalex.org/W2101234009","https://openalex.org/W2108612889","https://openalex.org/W2118020653","https://openalex.org/W2123442489","https://openalex.org/W2131744502","https://openalex.org/W2144578941","https://openalex.org/W2153579005","https://openalex.org/W2161466446","https://openalex.org/W2164926850","https://openalex.org/W2169818249","https://openalex.org/W2182868890","https://openalex.org/W2316340232","https://openalex.org/W2337021505","https://openalex.org/W2396820472","https://openalex.org/W2397622830","https://openalex.org/W2401312310","https://openalex.org/W2407793991","https://openalex.org/W2594866016","https://openalex.org/W2724771389","https://openalex.org/W2802787326","https://openalex.org/W2883611677","https://openalex.org/W2883885170","https://openalex.org/W2884001105","https://openalex.org/W2887888371","https://openalex.org/W2896457183","https://openalex.org/W2907090821","https://openalex.org/W2950577311","https://openalex.org/W2952087486","https://openalex.org/W2962739339","https://openalex.org/W2963133108","https://openalex.org/W2963341956","https://openalex.org/W2964262055","https://openalex.org/W2965221296","https://openalex.org/W2972393600","https://openalex.org/W2997591727","https://openalex.org/W4230872509","https://openalex.org/W4236521339","https://openalex.org/W4294170691"],"related_works":["https://openalex.org/W4310225030","https://openalex.org/W4308854837","https://openalex.org/W4298130764","https://openalex.org/W4226226396","https://openalex.org/W3153750606","https://openalex.org/W2804364458","https://openalex.org/W2393816671","https://openalex.org/W2158836806","https://openalex.org/W2132641928","https://openalex.org/W2090259340"],"abstract_inverted_index":{"In":[0],"this":[1,26],"paper,":[2],"we":[3,143,156],"show":[4,144],"how":[5],"selecting":[6],"and":[7,12,17,33,58,60,64,69,80,93,108,117,119,131,140,158,162],"combining":[8],"encodings":[9,55,73],"of":[10,19,30,56,66,87,91,97,115,126],"natural":[11],"mathematical":[13,22],"language":[14],"affect":[15],"classification":[16,68,75,130,161],"clustering":[18,70,132],"documents":[20],"with":[21],"content.":[23],"We":[24,100],"demonstrate":[25],"by":[27,43],"using":[28],"sets":[29],"documents,":[31],"sections,":[32],"abstracts":[34],"from":[35],"the":[36,62,113,146],"arXiv":[37],"preprint":[38],"server":[39],"that":[40,145],"are":[41],"labeled":[42],"their":[44],"subject":[45],"class":[46],"(mathematics,":[47],"computer":[48,147],"science,":[49],"physics,":[50],"etc.)":[51],"to":[52,78,84],"compare":[53],"different":[54],"text":[57,107,116],"formulae":[59,118],"evaluate":[61,157],"performance":[63],"runtimes":[65],"selected":[67],"algorithms.":[71],"Our":[72],"achieve":[74],"accuracies":[76],"up":[77,83],"82.8%":[79],"cluster":[81],"purities":[82],"69.4%":[85],"(number":[86],"clusters":[88],"equals":[89],"number":[90,96],"classes),":[92],"99.9%":[94],"(unspecified":[95],"clusters)":[98],"respectively.":[99],"observe":[101],"a":[102,127,149],"relatively":[103],"low":[104],"correlation":[105],"between":[106],"math":[109],"similarity,":[110],"which":[111],"indicates":[112],"independence":[114],"motivates":[120],"treating":[121],"them":[122],"as":[123],"separate":[124],"features":[125],"document.":[128],"The":[129],"can":[133],"be":[134],"employed,":[135],"e.g.,":[136],"for":[137],"document":[138],"search":[139],"recommendation.":[141],"Furthermore,":[142],"outperforms":[148],"human":[150],"expert":[151],"when":[152],"classifying":[153],"documents.":[154],"Finally,":[155],"discuss":[159],"multi-label":[160],"formula":[163],"semantification.":[164]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W3101645173","counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":6},{"year":2021,"cited_by_count":6},{"year":2020,"cited_by_count":4}],"updated_date":"2025-03-15T17:48:43.006937","created_date":"2020-11-23"}