{"id":"https://openalex.org/W2949213912","doi":"https://doi.org/10.1145/3331184.3331241","title":"Optimal Freshness Crawl Under Politeness Constraints","display_name":"Optimal Freshness Crawl Under Politeness Constraints","publication_year":2019,"publication_date":"2019-07-18","ids":{"openalex":"https://openalex.org/W2949213912","doi":"https://doi.org/10.1145/3331184.3331241","mag":"2949213912"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/3331184.3331241","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101862316","display_name":"Andrey Kolobov","orcid":"https://orcid.org/0000-0003-4966-7466"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"funder","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Andrey Kolobov","raw_affiliation_strings":["Microsoft Research, Redmond, WA, USA"],"affiliations":[{"raw_affiliation_string":"Microsoft Research, Redmond, WA, USA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020836845","display_name":"Yuval Peres","orcid":"https://orcid.org/0000-0001-5456-6323"},"institutions":[],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yuval Peres","raw_affiliation_strings":["No Affiliation, WA, USA"],"affiliations":[{"raw_affiliation_string":"No Affiliation, WA, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047720470","display_name":"Eyal Lubetzky","orcid":"https://orcid.org/0000-0002-2281-3542"},"institutions":[{"id":"https://openalex.org/I57206974","display_name":"New York University","ror":"https://ror.org/0190ak572","country_code":"US","type":"funder","lineage":["https://openalex.org/I57206974"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Eyal Lubetzky","raw_affiliation_strings":["New York University, New York, NY, USA"],"affiliations":[{"raw_affiliation_string":"New York University, New York, NY, USA","institution_ids":["https://openalex.org/I57206974"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5043228682","display_name":"Eric Horvitz","orcid":"https://orcid.org/0000-0002-8823-0614"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"funder","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Eric Horvitz","raw_affiliation_strings":["Microsoft Research, Redmond, WA, USA"],"affiliations":[{"raw_affiliation_string":"Microsoft Research, Redmond, WA, USA","institution_ids":["https://openalex.org/I1290206253"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":4.409,"has_fulltext":true,"fulltext_origin":"ngrams","cited_by_count":27,"citation_normalized_percentile":{"value":0.925658,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":93,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"495","last_page":"504"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9996,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9996,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12288","display_name":"Optimization and Search Problems","score":0.9983,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9789,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/web-crawler","display_name":"Web crawler","score":0.8975462},{"id":"https://openalex.org/keywords/focused-crawler","display_name":"Focused crawler","score":0.8117677},{"id":"https://openalex.org/keywords/crawling","display_name":"Crawling","score":0.5485122}],"concepts":[{"id":"https://openalex.org/C13743948","wikidata":"https://www.wikidata.org/wiki/Q45842","display_name":"Web crawler","level":2,"score":0.8975462},{"id":"https://openalex.org/C73340581","wikidata":"https://www.wikidata.org/wiki/Q5463958","display_name":"Focused crawler","level":5,"score":0.8117677},{"id":"https://openalex.org/C61123122","wikidata":"https://www.wikidata.org/wiki/Q281287","display_name":"Politeness","level":2,"score":0.7659201},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7206609},{"id":"https://openalex.org/C97854310","wikidata":"https://www.wikidata.org/wiki/Q19541","display_name":"Search engine","level":2,"score":0.56232107},{"id":"https://openalex.org/C100368936","wikidata":"https://www.wikidata.org/wiki/Q1411725","display_name":"Crawling","level":2,"score":0.5485122},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.5154827},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.5037102},{"id":"https://openalex.org/C2777382242","wikidata":"https://www.wikidata.org/wiki/Q6017816","display_name":"Index (typography)","level":2,"score":0.44333822},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.4161417},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.38376716},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.3338253},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.21706027},{"id":"https://openalex.org/C173576120","wikidata":"https://www.wikidata.org/wiki/Q2641220","display_name":"Static web page","level":4,"score":0.12756053},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.12549219},{"id":"https://openalex.org/C61096286","wikidata":"https://www.wikidata.org/wiki/Q7978592","display_name":"Web navigation","level":3,"score":0.09891257},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.06988281},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C105702510","wikidata":"https://www.wikidata.org/wiki/Q514","display_name":"Anatomy","level":1,"score":0.0},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/3331184.3331241","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":22,"referenced_works":["https://openalex.org/W1566984846","https://openalex.org/W1854214752","https://openalex.org/W1970841368","https://openalex.org/W1976624301","https://openalex.org/W1977522747","https://openalex.org/W1987272746","https://openalex.org/W2016122268","https://openalex.org/W2018928332","https://openalex.org/W2030453570","https://openalex.org/W2034778995","https://openalex.org/W2038378248","https://openalex.org/W2059713800","https://openalex.org/W2118129497","https://openalex.org/W2125596588","https://openalex.org/W2127536142","https://openalex.org/W2151932833","https://openalex.org/W2164542999","https://openalex.org/W2165862792","https://openalex.org/W2258998514","https://openalex.org/W2888023058","https://openalex.org/W2911388033","https://openalex.org/W4251434705"],"related_works":["https://openalex.org/W4385695127","https://openalex.org/W2375180657","https://openalex.org/W2358310581","https://openalex.org/W2352686120","https://openalex.org/W2274831913","https://openalex.org/W2137810919","https://openalex.org/W2042034567","https://openalex.org/W2026132847","https://openalex.org/W2019080882","https://openalex.org/W1506122440"],"abstract_inverted_index":{"A":[0],"Web":[1,25],"crawler":[2,39],"is":[3,26],"an":[4,154,171],"essential":[5],"part":[6],"of":[7,93,134,175,189],"a":[8,38,114,168,177,199],"search":[9,18,48,98],"engine":[10,19],"that":[11],"procures":[12],"information":[13],"subsequently":[14],"served":[15],"by":[16,58,202],"the":[17,24,47,55,61,76,89,97,105,123,132,145,165,186],"to":[20,33,41,53,68,84,170],"its":[21,159],"users.":[22],"As":[23],"becoming":[27],"increasingly":[28],"more":[29],"dynamic,":[30],"in":[31,46,51,131,164],"addition":[32],"discovering":[34,167],"new":[35],"web":[36],"pages":[37,70,109],"needs":[40],"keep":[42,54],"revisiting":[43],"those":[44],"already":[45],"engine's":[49,99],"index,":[50],"order":[52],"index":[56],"fresh":[57],"picking":[59],"up":[60],"pages'":[62,77],"changed":[63],"content.":[64],"Determining":[65],"how":[66],"often":[67],"recrawl":[69],"requires":[71],"making":[72],"tradeoffs":[73],"based":[74],"on":[75,96,198],"relative":[78,187],"importance":[79,142],"and":[80,101,144,163,191],"change":[81],"rates,":[82],"subject":[83],"multiple":[85],"resource":[86],"constraints":[87,103,136,197],"-":[88],"limited":[90],"daily":[91,207],"budget":[92],"crawl":[94,129,148,182],"requests":[95],"end":[100],"politeness":[102,135,196],"restricting":[104],"rate":[106],"at":[107],"which":[108],"can":[110],"be":[111],"requested":[112],"from":[113],"given":[115],"host.":[116],"In":[117],"this":[118],"paper,":[119],"we":[120],"introduce":[121],"PoliteBinaryLambdaCrawl,":[122],"first":[124],"optimal":[125],"algorithm":[126],"for":[127,156,180,194],"freshness":[128,181],"scheduling":[130],"presence":[133],"as":[137,139,176],"well":[138],"non-uniform":[140],"page":[141],"scores":[143],"crawler's":[146],"own":[147],"request":[149],"limit.":[150],"We":[151,184],"also":[152],"propose":[153],"approximation":[155],"it,":[157],"stating":[158],"theoretical":[160],"optimality":[161],"conditions":[162],"process":[166],"connection":[169],"approach":[172],"previously":[173],"thought":[174],"mere":[178],"heuristic":[179],"scheduling.":[183],"explore":[185],"performance":[188],"PoliteBinaryLambdaCrawl":[190],"other":[192],"methods":[193],"handling":[195],"dataset":[200],"collected":[201],"crawling":[203],"over":[204,208],"18.5M":[205],"URLs":[206],"14":[209],"weeks.":[210]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2949213912","counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":7},{"year":2020,"cited_by_count":12},{"year":2019,"cited_by_count":3}],"updated_date":"2025-02-15T23:26:58.018457","created_date":"2019-06-27"}