{"id":"https://openalex.org/W1980531291","doi":"https://doi.org/10.1016/j.procs.2014.05.363","title":"Analysis and Detection of Bogus Behavior in Web Crawler Measurement","display_name":"Analysis and Detection of Bogus Behavior in Web Crawler Measurement","publication_year":2014,"publication_date":"2014-01-01","ids":{"openalex":"https://openalex.org/W1980531291","doi":"https://doi.org/10.1016/j.procs.2014.05.363","mag":"1980531291"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.procs.2014.05.363","pdf_url":null,"source":{"id":"https://openalex.org/S120348307","display_name":"Procedia Computer Science","issn_l":"1877-0509","issn":["1877-0509"],"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1016/j.procs.2014.05.363","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5112210047","display_name":"Quan Bai","orcid":null},"institutions":[{"id":"https://openalex.org/I4210156404","display_name":"Institute of Information Engineering","ror":"https://ror.org/04r53se39","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210156404"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Quan Bai","raw_affiliation_strings":["Institute of Information Engineering, Chinese Academy of Science, 100093, China"],"affiliations":[{"raw_affiliation_string":"Institute of Information Engineering, Chinese Academy of Science, 100093, China","institution_ids":["https://openalex.org/I4210156404"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115601598","display_name":"Gang Xiong","orcid":"https://orcid.org/0000-0002-3190-6521"},"institutions":[{"id":"https://openalex.org/I4210156404","display_name":"Institute of Information Engineering","ror":"https://ror.org/04r53se39","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210156404"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Gang Xiong","raw_affiliation_strings":["Institute of Information Engineering, Chinese Academy of Science, 100093, China"],"affiliations":[{"raw_affiliation_string":"Institute of Information Engineering, Chinese Academy of Science, 100093, China","institution_ids":["https://openalex.org/I4210156404"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101470829","display_name":"Yong Zhao","orcid":"https://orcid.org/0000-0002-2841-047X"},"institutions":[{"id":"https://openalex.org/I4210156404","display_name":"Institute of Information Engineering","ror":"https://ror.org/04r53se39","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210156404"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yong Zhao","raw_affiliation_strings":["Institute of Information Engineering, Chinese Academy of Science, 100093, China"],"affiliations":[{"raw_affiliation_string":"Institute of Information Engineering, Chinese Academy of Science, 100093, China","institution_ids":["https://openalex.org/I4210156404"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5083944036","display_name":"Longtao He","orcid":"https://orcid.org/0000-0001-7072-7457"},"institutions":[{"id":"https://openalex.org/I4210156404","display_name":"Institute of Information Engineering","ror":"https://ror.org/04r53se39","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210156404"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Longtao He","raw_affiliation_strings":["Institute of Information Engineering, Chinese Academy of Science, 100093, China"],"affiliations":[{"raw_affiliation_string":"Institute of Information Engineering, Chinese Academy of Science, 100093, China","institution_ids":["https://openalex.org/I4210156404"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.578,"has_fulltext":true,"fulltext_origin":"ngrams","cited_by_count":19,"citation_normalized_percentile":{"value":0.962636,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":91},"biblio":{"volume":"31","issue":null,"first_page":"1084","last_page":"1091"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9997,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9997,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10064","display_name":"Complex Network Analysis Techniques","score":0.986,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9756,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/web-crawler","display_name":"Web crawler","score":0.96161854},{"id":"https://openalex.org/keywords/focused-crawler","display_name":"Focused crawler","score":0.72353405}],"concepts":[{"id":"https://openalex.org/C13743948","wikidata":"https://www.wikidata.org/wiki/Q45842","display_name":"Web crawler","level":2,"score":0.96161854},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8448194},{"id":"https://openalex.org/C73340581","wikidata":"https://www.wikidata.org/wiki/Q5463958","display_name":"Focused crawler","level":5,"score":0.72353405},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.6362048},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.550794},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.46416786},{"id":"https://openalex.org/C61096286","wikidata":"https://www.wikidata.org/wiki/Q7978592","display_name":"Web navigation","level":3,"score":0.30963194},{"id":"https://openalex.org/C173576120","wikidata":"https://www.wikidata.org/wiki/Q2641220","display_name":"Static web page","level":4,"score":0.22018126}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.procs.2014.05.363","pdf_url":null,"source":{"id":"https://openalex.org/S120348307","display_name":"Procedia Computer Science","issn_l":"1877-0509","issn":["1877-0509"],"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.procs.2014.05.363","pdf_url":null,"source":{"id":"https://openalex.org/S120348307","display_name":"Procedia Computer Science","issn_l":"1877-0509","issn":["1877-0509"],"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","score":0.41,"display_name":"Industry, innovation and infrastructure"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":18,"referenced_works":["https://openalex.org/W110443600","https://openalex.org/W1552211698","https://openalex.org/W1583640058","https://openalex.org/W1965434526","https://openalex.org/W1965507734","https://openalex.org/W1970614016","https://openalex.org/W2006113505","https://openalex.org/W2007089944","https://openalex.org/W2066636486","https://openalex.org/W2084585486","https://openalex.org/W2088345472","https://openalex.org/W2118015080","https://openalex.org/W2138393708","https://openalex.org/W2360209776","https://openalex.org/W2369627348","https://openalex.org/W29649622","https://openalex.org/W4238227199","https://openalex.org/W4240717613"],"related_works":["https://openalex.org/W757864652","https://openalex.org/W4312370889","https://openalex.org/W4205141839","https://openalex.org/W3216588747","https://openalex.org/W3202833648","https://openalex.org/W3164053708","https://openalex.org/W2994326481","https://openalex.org/W2941499861","https://openalex.org/W2112685907","https://openalex.org/W2021263615"],"abstract_inverted_index":{"With":[0],"the":[1,4,48,54,63,69,76,81,94,100,110],"development":[2],"of":[3,23,53,57,71,78,80,89,102,128],"Internet,":[5],"search":[6,41],"engine":[7],"technology":[8],"is":[9,28],"becoming":[10],"more":[11,13],"and":[12,37,61,74,109,121],"popular.":[14],"Web":[15],"Crawlers":[16],"have":[17],"taken":[18],"up":[19],"a":[20,72,116],"great":[21],"deal":[22],"Internet":[24,27,49],"bandwidth.":[25],"The":[26],"filled":[29],"with":[30,125],"\"bogus\"":[31,64,111,122],"web":[32,59,65,90,107,112,123],"crawlers":[33,46,66],"besides":[34],"Google,":[35],"Baidu":[36],"some":[38],"other":[39],"famous":[40],"engines.":[42],"Coded":[43],"roughly,":[44],"these":[45],"hazard":[47],"seriously.":[50],"Correct":[51],"analysis":[52],"traffic":[55,92],"characteristics":[56,104],"Google":[58,106],"crawler":[60,91,108],"shielding":[62],"can":[67],"improve":[68],"performance":[70],"site":[73],"enhance":[75],"quality":[77],"service":[79],"network.":[82],"In":[83],"this":[84],"paper,":[85],"we":[86],"measured":[87],"massive":[88],"in":[93],"real":[95,120],"high":[96],"speed":[97],"network,":[98],"compared":[99],"differences":[101],"statistical":[103],"between":[105],"crawlers.":[113],"We":[114],"proposed":[115],"model":[117],"to":[118],"detect":[119],"crawlers,":[124],"accuracy":[126],"rate":[127],"about":[129],"95%.":[130]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W1980531291","counts_by_year":[{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":5},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":2}],"updated_date":"2025-04-23T12:41:23.336736","created_date":"2016-06-24"}