Merge pull request #278 from d00rman/blacklist · wikimedia/mediawiki-services-change-propagation@a66b4c6 · GitHub
Skip to content

Commit

Permalink
Merge pull request #278 from d00rman/blacklist
Browse files Browse the repository at this point in the history
Allow for events to be blacklisted
  • Loading branch information
Pchelolo authored Jul 2, 2018
2 parents 06178c2 + 3c213dc commit a66b4c6
Showing 4 changed files with 157 additions and 0 deletions.
34 changes: 34 additions & 0 deletions config.example.wikimedia.yaml
Original file line number Diff line number Diff line change
@@ -75,6 +75,40 @@ spec: &spec
queue.buffering.max.messages: "10"
compression.codec: snappy
concurrency: 250
enable_blacklist: true
blacklist:
en.wikipedia.org:
- 'User:B-bot/Event_log'
- 'User:DeltaQuad/UAA/Wait'
- 'User:JamesR/AdminStats'
- 'User:Kudpung/Dashboard'
# Various dashboards
- 'User:Breawycker/Wikipedia'
- 'User:Sonia/dashboard'
- 'User:Ocaasi/dashboard'
- 'User:Nolelover'
- 'User:Calmer_Waters'
- '/User%3ARedwolf24\//'
- 'User:Technical_13/dashboard'
- 'Template:Cratstats'
# Cyberbot is creating 90% of null edits
- '/^User:Cyberbot_I\//'
- '/^User:Cyberbot_II\//'
- '/^User:Cyberpower678\//'
- '/^User:Darts170Darts170\//'
- 'صارف:Cyberbot_I/Run/Adminstats'
- 'Defnyddiwr:Cyberbot_I/Run/Adminstats'
- 'User:Pentjuuu!.!/sandbox'
- 'User:AllyD/CSDlog'
- 'User:Peter_I._Vardy/sandbox-13'
- 'User:I_dream_of_horses/CSD_log'
- 'User:MJ180MJ180/sandbox'
- 'Talk:United_States_presidential_election,_2016'
- 'Wikipedia:Reference_desk/Humanities'
- 'Wikipedia:WikiProject_Deletion_sorting/People'
- 'Wikipedia:WikiProject_Deletion_sorting/United_States_of_America'
- 'Wikipedia:Articles_for_creation/Redirects'
- 'Wikipedia:Administrators%27_noticeboard/Incidents'
templates:

summary_definition_rerender: &summary_definition_rerender_spec
36 changes: 36 additions & 0 deletions lib/base_executor.js
Original file line number Diff line number Diff line change
@@ -92,6 +92,14 @@ class BaseExecutor {
this.disable_delayed_execution = rule.spec.disable_delayed_execution
|| this.options.disable_delayed_execution
|| false;
/* eslint-disable multiline-ternary */
this.enable_blacklist =
rule.spec.enable_blacklist !== undefined ?
rule.spec.enable_blacklist : (this.options.enable_blacklist || false);
/* eslint-enable multiline-ternary */
this.blacklist = BaseExecutor._compileBlacklist(
rule.spec.blacklist || this.options.blacklist || {}
);

this._commitTimeout = null;
// In order ti filter out the pending messages faster make them offset->msg map
@@ -352,6 +360,13 @@ class BaseExecutor {
this._logger.log('trace/sample', sampleLog);
}

_isBlacklisted(event) {
if (!this.enable_blacklist || !this.blacklist[event.meta.domain]) {
return false;
}
return this.blacklist[event.meta.domain].test(event.meta.uri);
}

_exec(origEvent, handler, statDelayStartTime, retryEvent) {
const startTime = Date.now();

@@ -360,6 +375,17 @@ class BaseExecutor {
match: handler.expand(origEvent)
};

if (this._isBlacklisted(origEvent)) {
this._logger.log('trace/blacklist', {
msg: 'Event was blacklisted',
event_str: utils.stringify(origEvent),
topic: origEvent.meta.topic,
uri: origEvent.meta.uri
});
this._hyper.metrics.increment(`${origEvent.meta.topic}_blacklist`);
return P.resolve({ status: 200 });
}

if (origEvent.delay_until && !this.disable_delayed_execution) {
// The delay_until in the job schema is a timestamps in seconds
const delayUntil = origEvent.delay_until * 1000;
@@ -620,6 +646,16 @@ class BaseExecutor {
}
return e;
}

static _compileBlacklist(blacklist) {
const result = {};
blacklist = blacklist || {};
Object.keys(blacklist).forEach((domain) => {
result[domain] = utils.constructRegex(blacklist[domain]);
});
return result;
}

}

module.exports = BaseExecutor;
24 changes: 24 additions & 0 deletions lib/utils.js
Original file line number Diff line number Diff line change
@@ -20,6 +20,7 @@ utils.triggeredBy = (event) => {
};

utils.requestId = () => TimeUUID.now().toString();

/**
* Safely stringifies the event to JSON string.
* @param {Object} event the event to stringify
@@ -33,4 +34,27 @@ utils.stringify = (event) => {
}
};

/**
* From a list of regexes and strings, constructs a regex that
* matches any item in the list
* @param {Array} list the list of regexes and strings to unify
* @return {RegExp|undefined} the compiled regex or undefined
*/
utils.constructRegex = (list) => {
if (!list || !Array.isArray(list) || list.length === 0) {
return undefined;
}
const regex = list.map((regexString) => {
regexString = regexString.trim();
if (/^\/.+\/$/.test(regexString)) {
return `(?:${regexString.substring(1, regexString.length - 1)})`;
}
// Compare strings, instead
const slash = /^\//.test(regexString) ? '' : '/';
return `(?:${slash}${decodeURIComponent(regexString)
.replace(/[-[\]/{}()*+?.\\^$|]/g, "\\$&")}$)`;
}).join('|');
return new RegExp(regex);
};

module.exports = utils;
63 changes: 63 additions & 0 deletions test/feature/update_rules.js
Original file line number Diff line number Diff line change
@@ -119,6 +119,35 @@ describe('RESTBase update rules', function() {
.finally(() => nock.cleanAll());
});

it('Should not update summary for a blacklisted title', () => {
const mwAPI = nock('https://en.wikipedia.org', {
reqheaders: {
'cache-control': 'no-cache',
'x-triggered-by': `req:${common.SAMPLE_REQUEST_ID},resource_change:https://en.wikipedia.org/wiki/User:Cyberbot_I/Test`,
'user-agent': 'SampleChangePropInstance'
}
})
.get('/api/rest_v1/page/summary/User%3ACyberbot_I%2FTest')
.query({ redirect: false })
.reply(200, { });

return P.try(() => producer.produce('test_dc.resource_change', 0,
Buffer.from(JSON.stringify({
meta: {
topic: 'resource_change',
schema_uri: 'resource_change/1',
uri: 'https://en.wikipedia.org/api/rest_v1/page/html/User:Cyberbot_I/Test',
request_id: common.SAMPLE_REQUEST_ID,
id: uuid.now(),
dt: new Date().toISOString(),
domain: 'en.wikipedia.org'
},
tags: ['restbase']
}))))
.then(() => common.checkPendingMocks(mwAPI, 1))
.finally(() => nock.cleanAll());
});

it('Should update definition endpoint', () => {
const mwAPI = nock('https://en.wiktionary.org', {
reqheaders: {
@@ -294,6 +323,40 @@ describe('RESTBase update rules', function() {
.finally(() => nock.cleanAll());
});

it('Should not update RESTBase on revision create for a blacklisted title', () => {
const mwAPI = nock('https://en.wikipedia.org', {
reqheaders: {
'cache-control': 'no-cache',
'x-triggered-by': `req:${common.SAMPLE_REQUEST_ID},mediawiki.revision-create:https://en.wikipedia.org/wiki/User:Nolelover`,
'x-restbase-parentrevision': '1233',
'if-unmodified-since': 'Thu, 01 Jan 1970 00:00:01 +0000',
'user-agent': 'SampleChangePropInstance'
}
})
.get('/api/rest_v1/page/html/User%3ANolelover/1234')
.query({ redirect: false })
.reply(200, { });

return P.try(() => producer.produce('test_dc.mediawiki.revision-create', 0,
Buffer.from(JSON.stringify({
meta: {
topic: 'mediawiki.revision-create',
schema_uri: 'revision-create/1',
uri: 'https://en.wikipedia.org/wiki/User:Nolelover',
request_id: common.SAMPLE_REQUEST_ID,
id: uuid.now(),
dt: new Date(1000).toISOString(),
domain: 'en.wikipedia.org'
},
page_title: 'User:Nolelover',
rev_id: 1234,
rev_timestamp: new Date().toISOString(),
rev_parent_id: 1233,
rev_content_changed: true
}))))
.then(() => common.checkPendingMocks(mwAPI, 1))
.finally(() => nock.cleanAll());
});

it('Should not update RESTBase on revision create for wikidata', () => {
const mwAPI = nock('https://www.wikidata.org')

0 comments on commit a66b4c6

Please sign in to comment.