{"id":"https://openalex.org/W2026961318","doi":"https://doi.org/10.1109/icws.2014.27","title":"A Web Service for Scholarly Big Data Information Extraction","display_name":"A Web Service for Scholarly Big Data Information Extraction","publication_year":2014,"publication_date":"2014-06-01","ids":{"openalex":"https://openalex.org/W2026961318","doi":"https://doi.org/10.1109/icws.2014.27","mag":"2026961318"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/icws.2014.27","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5108439761","display_name":"Kyle Williams","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kyle Williams","raw_affiliation_strings":["Information Sciences and Technology"],"affiliations":[{"raw_affiliation_string":"Information Sciences and Technology","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067508120","display_name":"Lichi Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lichi Li","raw_affiliation_strings":["Information Sciences and Technology"],"affiliations":[{"raw_affiliation_string":"Information Sciences and Technology","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054253075","display_name":"Madian Khabsa","orcid":null},"institutions":[{"id":"https://openalex.org/I130769515","display_name":"Pennsylvania State University","ror":"https://ror.org/04p491231","country_code":"US","type":"education","lineage":["https://openalex.org/I130769515"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Madian Khabsa","raw_affiliation_strings":["Computer Science and Engineering, The Pennsylvania State University, University Park, PA, USA"],"affiliations":[{"raw_affiliation_string":"Computer Science and Engineering, The Pennsylvania State University, University Park, PA, USA","institution_ids":["https://openalex.org/I130769515"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075242841","display_name":"Jian Wu","orcid":"https://orcid.org/0000-0003-0173-4463"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jian Wu","raw_affiliation_strings":["Information Sciences and Technology"],"affiliations":[{"raw_affiliation_string":"Information Sciences and Technology","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015350727","display_name":"Patrick C. Shih","orcid":"https://orcid.org/0000-0003-2460-0468"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Patrick C. Shih","raw_affiliation_strings":["Information Sciences and Technology"],"affiliations":[{"raw_affiliation_string":"Information Sciences and Technology","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5001294898","display_name":"C. Lee Giles","orcid":"https://orcid.org/0000-0002-1931-585X"},"institutions":[{"id":"https://openalex.org/I130769515","display_name":"Pennsylvania State University","ror":"https://ror.org/04p491231","country_code":"US","type":"education","lineage":["https://openalex.org/I130769515"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"C. Lee Giles","raw_affiliation_strings":["Computer Science and Engineering, The Pennsylvania State University, University Park, PA, USA","Information Sciences and Technology"],"affiliations":[{"raw_affiliation_string":"Computer Science and Engineering, The Pennsylvania State University, University Park, PA, USA","institution_ids":["https://openalex.org/I130769515"]},{"raw_affiliation_string":"Information Sciences and Technology","institution_ids":[]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.07,"has_fulltext":true,"fulltext_origin":"ngrams","cited_by_count":21,"citation_normalized_percentile":{"value":0.790558,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":92},"biblio":{"volume":null,"issue":null,"first_page":"105","last_page":"112"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9969,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9969,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9964,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9961,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/web-data-extraction","display_name":"Web Data Extraction","score":0.6312},{"id":"https://openalex.org/keywords/data-deduplication","display_name":"Data deduplication","score":0.5806103},{"id":"https://openalex.org/keywords/scientific-workflows","display_name":"Scientific Workflows","score":0.575582},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information Retrieval","score":0.552463},{"id":"https://openalex.org/keywords/web-crawling","display_name":"Web Crawling","score":0.528117},{"id":"https://openalex.org/keywords/data-cleaning","display_name":"Data Cleaning","score":0.526569}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8602862},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.75911856},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.65626854},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5996211},{"id":"https://openalex.org/C32587265","wikidata":"https://www.wikidata.org/wiki/Q1182260","display_name":"Data deduplication","level":2,"score":0.5806103},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.5474074},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4810722},{"id":"https://openalex.org/C153048206","wikidata":"https://www.wikidata.org/wiki/Q3454922","display_name":"Metadata repository","level":3,"score":0.47037974},{"id":"https://openalex.org/C2780378061","wikidata":"https://www.wikidata.org/wiki/Q25351891","display_name":"Service (business)","level":2,"score":0.466055},{"id":"https://openalex.org/C513874922","wikidata":"https://www.wikidata.org/wiki/Q212805","display_name":"Digital library","level":3,"score":0.4398209},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.41988325},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.41928938},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C136264566","wikidata":"https://www.wikidata.org/wiki/Q159810","display_name":"Economy","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C164913051","wikidata":"https://www.wikidata.org/wiki/Q482","display_name":"Poetry","level":2,"score":0.0},{"id":"https://openalex.org/C124952713","wikidata":"https://www.wikidata.org/wiki/Q8242","display_name":"Literature","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/icws.2014.27","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":19,"referenced_works":["https://openalex.org/W1559499673","https://openalex.org/W1585912603","https://openalex.org/W1603719052","https://openalex.org/W1967652622","https://openalex.org/W2012833704","https://openalex.org/W2042641525","https://openalex.org/W2057954853","https://openalex.org/W2079999708","https://openalex.org/W2091344457","https://openalex.org/W2098162425","https://openalex.org/W2129647599","https://openalex.org/W2132083030","https://openalex.org/W2140479099","https://openalex.org/W2145349611","https://openalex.org/W2151355086","https://openalex.org/W2153637372","https://openalex.org/W2906926620","https://openalex.org/W4230940751","https://openalex.org/W4253723135"],"related_works":["https://openalex.org/W4385414095","https://openalex.org/W4299935056","https://openalex.org/W3023161639","https://openalex.org/W2782431616","https://openalex.org/W2394393789","https://openalex.org/W2374379029","https://openalex.org/W2183628870","https://openalex.org/W2008531296","https://openalex.org/W1552553528","https://openalex.org/W1503116306"],"abstract_inverted_index":{"The":[0,112],"automatic":[1],"extraction":[2,46,55,76,90],"of":[3,32,52,106,139,152],"metadata":[4,117,121],"and":[5,21,30,34,72,103,118,137,141,161],"other":[6],"information":[7,89,163],"from":[8,122,164],"scholarly":[9,88,100],"documents":[10,33,168],"is":[11,97],"a":[12,49,84,107,123,150,171],"common":[13],"task":[14],"in":[15,99,149,154],"academic":[16],"digital":[17],"libraries,":[18],"search":[19,36],"engines,":[20],"document":[22,62,124,146],"management":[23,29],"systems":[24],"to":[25,37,70,158,170],"allow":[26],"for":[27,35,54,67,87],"the":[28,65,93,134,144,155],"categorization":[31],"take":[38],"place.":[39],"A":[40],"Web-accessible":[41],"API":[42,86],"can":[43,57],"simplify":[44],"this":[45,79],"by":[47],"providing":[48],"single":[50],"point":[51],"operation":[53],"that":[56,91,95],"be":[58],"incorporated":[59],"into":[60],"multiple":[61],"workflows":[63],"without":[64],"need":[66],"each":[68],"workflow":[69],"implement":[71],"support":[73],"its":[74],"own":[75],"functionality.":[77],"In":[78],"paper,":[80],"we":[81],"describe":[82,133],"CiteSeerExtractor,":[83],"RESTful":[85],"exploits":[92],"fact":[94],"there":[96],"duplication":[98],"big":[101],"data":[102],"makes":[104],"use":[105],"near":[108],"duplicate":[109,145],"matching":[110,147],"backend.":[111],"backend":[113],"stores":[114],"previously":[115],"extracted":[116,130],"avoids":[119],"extracting":[120],"if":[125],"it":[126],"has":[127],"already":[128],"been":[129],"before.":[131],"We":[132],"design,":[135],"implementation,":[136],"functionality":[138],"CiteSeerExtractor":[140],"show":[142],"how":[143],"results":[148],"difference":[151],"8.46%":[153],"time":[156],"required":[157],"extract":[159],"header":[160],"citation":[162],"approximately":[165],"3.5":[166],"million":[167],"compared":[169],"baseline.":[172]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2026961318","counts_by_year":[{"year":2021,"cited_by_count":1},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":6},{"year":2017,"cited_by_count":4},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":3},{"year":2014,"cited_by_count":2}],"updated_date":"2024-12-05T06:55:05.075492","created_date":"2016-06-24"}