{"id":"https://openalex.org/W3011076887","doi":"https://doi.org/10.46298/jdmdh.5077","title":"Extracting Keywords from Open-Ended Business Survey Questions","display_name":"Extracting Keywords from Open-Ended Business Survey Questions","publication_year":2020,"publication_date":"2020-03-17","ids":{"openalex":"https://openalex.org/W3011076887","doi":"https://doi.org/10.46298/jdmdh.5077","mag":"3011076887"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.46298/jdmdh.5077","pdf_url":"https://jdmdh.episciences.org/5398/pdf","source":{"id":"https://openalex.org/S2736708624","display_name":"Journal of Data Mining & Digital Humanities","issn_l":"2416-5999","issn":["2416-5999"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311340","host_organization_name":"Nicolas Turenne","host_organization_lineage":["https://openalex.org/P4310311340"],"host_organization_lineage_names":["Nicolas Turenne"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"journal-article","indexed_in":["arxiv","crossref","datacite","doaj"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://jdmdh.episciences.org/5398/pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5062737501","display_name":"Barbara McGillivray","orcid":"https://orcid.org/0000-0003-3426-8200"},"institutions":[{"id":"https://openalex.org/I4210128584","display_name":"The Alan Turing Institute","ror":"https://ror.org/035dkdb55","country_code":"GB","type":"facility","lineage":["https://openalex.org/I4210128584"]},{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Barbara McGillivray","raw_affiliation_strings":["The Alan Turing Institute","University of Cambridge"],"affiliations":[{"raw_affiliation_string":"The Alan Turing Institute","institution_ids":["https://openalex.org/I4210128584"]},{"raw_affiliation_string":"University of Cambridge","institution_ids":["https://openalex.org/I241749"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009409061","display_name":"Gard B. Jenset","orcid":"https://orcid.org/0000-0001-7423-3112"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gard Jenset","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5058480899","display_name":"Dominik Heil","orcid":null},"institutions":[{"id":"https://openalex.org/I192619145","display_name":"University of the Witwatersrand","ror":"https://ror.org/03rp50x72","country_code":"ZA","type":"education","lineage":["https://openalex.org/I192619145"]}],"countries":["ZA"],"is_corresponding":false,"raw_author_name":"Dominik Heil","raw_affiliation_strings":["University of the Witwatersrand, Johannesburg"],"affiliations":[{"raw_affiliation_string":"University of the Witwatersrand, Johannesburg","institution_ids":["https://openalex.org/I192619145"]}]}],"institution_assertions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5062737501"],"corresponding_institution_ids":["https://openalex.org/I4210128584","https://openalex.org/I241749"],"apc_list":{"value":0,"currency":"USD","value_usd":0,"provenance":"doaj"},"apc_paid":null,"fwci":0.238,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":3,"citation_normalized_percentile":{"value":0.744604,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":73,"max":77},"biblio":{"volume":"2020","issue":"Project","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9523,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10799","display_name":"Data Visualization and Analytics","score":0.9467,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bespoke","display_name":"Bespoke","score":0.79097116},{"id":"https://openalex.org/keywords/scope","display_name":"Scope (computer science)","score":0.538814}],"concepts":[{"id":"https://openalex.org/C44210515","wikidata":"https://www.wikidata.org/wiki/Q16968978","display_name":"Bespoke","level":2,"score":0.79097116},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.728111},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.66690135},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.64231855},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5813959},{"id":"https://openalex.org/C2778012447","wikidata":"https://www.wikidata.org/wiki/Q1034415","display_name":"Scope (computer science)","level":2,"score":0.538814},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.32553166},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.11399683},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.46298/jdmdh.5077","pdf_url":"https://jdmdh.episciences.org/5398/pdf","source":{"id":"https://openalex.org/S2736708624","display_name":"Journal of Data Mining & Digital Humanities","issn_l":"2416-5999","issn":["2416-5999"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311340","host_organization_name":"Nicolas Turenne","host_organization_lineage":["https://openalex.org/P4310311340"],"host_organization_lineage_names":["Nicolas Turenne"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/1808.10685","pdf_url":"https://arxiv.org/pdf/1808.10685","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://doaj.org/article/1df70a1dc7004cfb8cb23d53503b6e6e","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://doaj.org/article/cac83b4be0ff46e689e40023ece36f77","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.1808.10685","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.46298/jdmdh.5077","pdf_url":"https://jdmdh.episciences.org/5398/pdf","source":{"id":"https://openalex.org/S2736708624","display_name":"Journal of Data Mining & Digital Humanities","issn_l":"2416-5999","issn":["2416-5999"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311340","host_organization_name":"Nicolas Turenne","host_organization_lineage":["https://openalex.org/P4310311340"],"host_organization_lineage_names":["Nicolas Turenne"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"display_name":"Quality education","id":"https://metadata.un.org/sdg/4","score":0.66}],"grants":[],"datasets":[],"versions":["https://openalex.org/W3011076887"],"referenced_works_count":25,"referenced_works":["https://openalex.org/W1521626219","https://openalex.org/W1574901103","https://openalex.org/W1762161369","https://openalex.org/W1852422273","https://openalex.org/W1972978393","https://openalex.org/W2029729099","https://openalex.org/W2098297502","https://openalex.org/W2103479483","https://openalex.org/W2115054880","https://openalex.org/W2133286915","https://openalex.org/W2144211451","https://openalex.org/W2146769536","https://openalex.org/W2159666810","https://openalex.org/W2167329753","https://openalex.org/W2180877453","https://openalex.org/W2317322038","https://openalex.org/W2340455726","https://openalex.org/W2518294942","https://openalex.org/W2592634758","https://openalex.org/W2769230674","https://openalex.org/W2896328475","https://openalex.org/W4205532272","https://openalex.org/W4251395411","https://openalex.org/W4390723842","https://openalex.org/W658480537"],"related_works":["https://openalex.org/W4389712339","https://openalex.org/W4244794230","https://openalex.org/W3202725889","https://openalex.org/W3093969907","https://openalex.org/W2806540758","https://openalex.org/W2494533090","https://openalex.org/W2479427402","https://openalex.org/W2232750048","https://openalex.org/W2212726445","https://openalex.org/W2181465263"],"abstract_inverted_index":{"Open-ended":[0],"survey":[1,21,31],"data":[2,22,32,41,102],"constitute":[3],"an":[4],"important":[5],"basis":[6],"in":[7,97,163,202,214],"research":[8],"as":[9,11],"well":[10],"for":[12,43,127,143,154,208],"making":[13],"business":[14,131],"decisions.":[15],"Collecting":[16],"and":[17,29,52,82,90,116,140,165,189,198,210,228],"manually":[18],"analysing":[19,30,155],"free-text":[20,40],"is":[23,119,226],"generally":[24],"more":[25],"costly":[26],"than":[27],"collecting":[28],"consisting":[33],"of":[34,58,76,114,137,146,158,185,220],"answers":[35],"to":[36,46,160,178,195,230],"multiple-choice":[37],"questions.":[38],"Yet":[39],"allow":[42],"new":[44,59],"content":[45],"be":[47,94],"expressed":[48],"beyond":[49],"predefined":[50],"categories":[51],"are":[53,80,107],"a":[54,151,203],"very":[55],"valuable":[56],"source":[57],"insights":[60],"into":[61],"people's":[62,171],"opinions.":[63,172],"At":[64],"the":[65,74,77,110,135,176,183,193,215,218],"same":[66],"time,":[67],"surveys":[68,164],"always":[69],"make":[70],"ontological":[71],"assumptions":[72],"about":[73],"nature":[75,113],"entities":[78,118],"that":[79,168,206],"researched,":[81],"this":[83,129,200],"has":[84],"vital":[85],"ethical":[86,130],"consequences.":[87],"Human":[88],"interpretations":[89],"opinions":[91],"can":[92],"only":[93],"properly":[95],"ascertained":[96],"their":[98],"richness":[99],"using":[100],"textual":[101],"sources;":[103],"if":[104],"these":[105],"sources":[106],"analyzed":[108],"appropriately,":[109],"essential":[111],"linguistic":[112],"humans":[115],"social":[117],"safeguarded.":[120],"Natural":[121],"Language":[122],"Processing":[123],"(NLP)":[124],"offers":[125],"possibilities":[126],"meeting":[128],"challenge":[132],"by":[133],"automating":[134],"analysis":[136],"natural":[138],"language":[139],"thus":[141],"allowing":[142],"insightful":[144],"investigations":[145],"human":[147],"judgements.":[148],"We":[149],"present":[150],"computational":[152],"pipeline":[153,174],"large":[156],"amounts":[157],"responses":[159],"open-ended":[161],"questions":[162],"extract":[166],"keywords":[167],"appropriately":[169],"represent":[170],"This":[173],"addresses":[175],"need":[177],"perform":[179],"such":[180],"tasks":[181],"outside":[182],"scope":[184],"both":[186],"commercial":[187],"software":[188],"bespoke":[190],"analysis,":[191],"exceeds":[192],"performance":[194],"state-of-the-art":[196],"systems,":[197],"performs":[199],"task":[201],"transparent":[204],"way":[205],"allows":[207],"scrutinising":[209],"exposing":[211],"potential":[212],"biases":[213],"analysis.":[216],"Following":[217],"principle":[219],"Open":[221],"Data":[222],"Science,":[223],"our":[224],"code":[225],"open-source":[227],"generalizable":[229],"other":[231],"datasets.":[232]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W3011076887","counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2020,"cited_by_count":1}],"updated_date":"2024-12-29T01:23:18.083106","created_date":"2020-03-23"}