Faster User ID reports processing: use ranking query by diosmosis · Pull Request #14551 · matomo-org/matomo · GitHub
Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Faster User ID reports processing: use ranking query #14551

Merged
merged 6 commits into from
Jun 26, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions core/DataAccess/LogAggregator.php
Original file line number Diff line number Diff line change
Expand Up @@ -318,13 +318,14 @@ public function getMetricsFromVisitByDimension($dimension)
* @param bool|\Piwik\RankingQuery $rankingQuery
* A pre-configured ranking query instance that will be used to limit the result.
* If set, the return value is the array returned by {@link Piwik\RankingQuery::execute()}.
*
* @return mixed A Zend_Db_Statement if `$rankingQuery` isn't supplied, otherwise the result of
* {@link Piwik\RankingQuery::execute()}. Read {@link queryVisitsByDimension() this}
* to see what aggregate data is calculated by the query.
* @api
*/
public function queryVisitsByDimension(array $dimensions = array(), $where = false, array $additionalSelects = array(),
$metrics = false, $rankingQuery = false)
$metrics = false, $rankingQuery = false, $orderBy = false)
{
$tableName = self::LOG_VISIT_TABLE;
$availableMetrics = $this->getVisitsMetricFields();
Expand All @@ -333,16 +334,22 @@ public function queryVisitsByDimension(array $dimensions = array(), $where = fal
$from = array($tableName);
$where = $this->getWhereStatement($tableName, self::VISIT_DATETIME_FIELD, $where);
$groupBy = $this->getGroupByStatement($dimensions, $tableName);
$orderBy = false;
$orderBys = $orderBy ? [$orderBy] : [];

if ($rankingQuery) {
$orderBy = '`' . Metrics::INDEX_NB_VISITS . '` DESC';
$orderBys[] = '`' . Metrics::INDEX_NB_VISITS . '` DESC';
}

$query = $this->generateQuery($select, $from, $where, $groupBy, $orderBy);
$query = $this->generateQuery($select, $from, $where, $groupBy, implode(', ', $orderBys));

if ($rankingQuery) {
unset($availableMetrics[Metrics::INDEX_MAX_ACTIONS]);

// INDEX_NB_UNIQ_FINGERPRINTS is only processed if specifically asked for
if (!$this->isMetricRequested(Metrics::INDEX_NB_UNIQ_FINGERPRINTS, $metrics)) {
unset($availableMetrics[Metrics::INDEX_NB_UNIQ_FINGERPRINTS]);
}

$sumColumns = array_keys($availableMetrics);

if ($metrics) {
Expand Down
48 changes: 44 additions & 4 deletions plugins/UserId/Archiver.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
use Piwik\Config;
use Piwik\DataArray;
use Piwik\DataTable;
use Piwik\Metrics;
use Piwik\Metrics as PiwikMetrics;
use Piwik\RankingQuery;

/**
* Archiver that aggregates metrics per user ID (user_id field).
Expand Down Expand Up @@ -77,19 +78,32 @@ protected function aggregateUsers()
$userIdFieldName = self::USER_ID_FIELD;
$visitorIdFieldName = self::VISITOR_ID_FIELD;

$rankingQueryLimit = $this->getRankingQueryLimit();

$rankingQuery = false;
if ($rankingQueryLimit > 0) {
$rankingQuery = new RankingQuery($rankingQueryLimit);
$rankingQuery->setOthersLabel(DataTable::LABEL_SUMMARY_ROW);
$rankingQuery->addLabelColumn($userIdFieldName);
$rankingQuery->addLabelColumn($visitorIdFieldName);
}

/** @var \Zend_Db_Statement $query */
$query = $this->getLogAggregator()->queryVisitsByDimension(
array(self::USER_ID_FIELD),
"log_visit.$userIdFieldName IS NOT NULL AND log_visit.$userIdFieldName != ''",
array("LOWER(HEX($visitorIdFieldName)) as $visitorIdFieldName")
array("LOWER(HEX($visitorIdFieldName)) as $visitorIdFieldName"),
$metrics = false,
$rankingQuery,
self::USER_ID_FIELD . ' ASC'
);

if ($query === false) {
return;
}

$rowsCount = 0;
while ($row = $query->fetch()) {
foreach ($query as $row) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

didn't know that worked 👍

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Neither did I :)

$rowsCount++;
$this->arrays->sumMetricsVisits($row[$userIdFieldName], $row);
$this->rememberVisitorId($row);
Expand All @@ -105,8 +119,23 @@ protected function insertDayReports()
{
/** @var DataTable $dataTable */
$dataTable = $this->arrays->asDataTable();

// deal w/ ranking query summary row
$rankingQuerySummaryRow = $dataTable->getRowFromLabel(DataTable::LABEL_SUMMARY_ROW);
if ($rankingQuerySummaryRow) {
$rankingQuerySummaryRowId = $dataTable->getRowIdFromLabel(DataTable::LABEL_SUMMARY_ROW);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you explain why this code is needed, and do we do this kind of logic in other archivers too (which ones)? if not, why is it needed in this archiver?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ranking query creates a row w/ label -1, this is added to the DataArray which is just an array, not a datatable so it has no concept of a summary row. When DataArray::asDataTable() is called, it adds the row as a regular row and not as a summary row.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, wondering why is this logic not needed for other plugins?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is required, Actions does it here: https://github.com/matomo-org/matomo/blob/3.x-dev/plugins/Actions/ArchivingHelper.php#L383

It's not used in Events/Contents, but that's likely a bug that wasn't dealt with since it's not tested in BlobReportLimitingTest. I'll add it to my list to fix that.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@diosmosis do you mind creating an issue for this bug in case it later slips off the list? unless you will open the PR this week or so. Thanks!

$dataTable->deleteRow($rankingQuerySummaryRowId);

$actualSummaryRow = $dataTable->getRowFromId(DataTable::ID_SUMMARY_ROW);
if ($actualSummaryRow) {
$actualSummaryRow->sumRow($rankingQuerySummaryRow);
} else {
$dataTable->addSummaryRow($rankingQuerySummaryRow);
}
}

$this->setVisitorIds($dataTable);
$report = $dataTable->getSerialized($this->maximumRowsInDataTableLevelZero, null, Metrics::INDEX_NB_VISITS);
$report = $dataTable->getSerialized($this->maximumRowsInDataTableLevelZero, null, PiwikMetrics::INDEX_NB_VISITS);
$this->getProcessor()->insertBlobRecord(self::USERID_ARCHIVE_RECORD, $report);
}

Expand Down Expand Up @@ -137,4 +166,15 @@ private function setVisitorIds(DataTable $dataTable)
}
}

private function getRankingQueryLimit()
{
$configGeneral = Config::getInstance()->General;
$configLimit = $configGeneral['archiving_ranking_query_row_limit'];
$limit = $configLimit == 0 ? 0 : max(
$configLimit,
$this->maximumRowsInDataTableLevelZero
);
return $limit;
}

}
2 changes: 2 additions & 0 deletions tests/PHPUnit/Fixtures/ManyVisitsWithMockLocationProvider.php
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ private function trackActions(\PiwikTracker $t, &$visitorCounter, $actionType, $
$visitDate = Date::factory($this->dateTime);

$t->setNewVisitorId();
$t->setUserId('user' . $visitorCounter);
$t->setIp("156.5.3.$visitorCounter");

$t->setUserAgent($userAgents[$visitorCounter]);
Expand Down Expand Up @@ -193,6 +194,7 @@ private function trackOrders($t)
$cat = $i % 5;

$t->setNewVisitorId();
$t->setUserId('user' . ($i + 10000));
$t->setIp("155.5.4.$i");
$t->setEcommerceView("id_book$i", "Book$i", "Books Cat #$cat", 7.50);
self::checkResponse($t->doTrackPageView('bought book'));
Expand Down
3 changes: 3 additions & 0 deletions tests/PHPUnit/System/BlobReportLimitingTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ public function getApiForTesting()
'Resolution.getResolution', 'Resolution.getConfiguration', 'DevicesDetection.getOsVersions',
'DevicesDetection.getBrowserVersions',
'UserCountry.getRegion', 'UserCountry.getCity',
'UserId.getUsers',
);

$ecommerceApi = array('Goals.getItemsSku', 'Goals.getItemsName', 'Goals.getItemsCategory');
Expand Down Expand Up @@ -156,6 +157,7 @@ public function testApiWithRankingQueryDisabled()
$generalConfig['datatable_archiving_maximum_rows_subtable_custom_variables'] = 500;
$generalConfig['archiving_ranking_query_row_limit'] = 0;
$generalConfig['datatable_archiving_maximum_rows_site_search'] = 500;
$generalConfig['datatable_archiving_maximum_rows_userid_users'] = 500;

foreach ($this->getRankingQueryDisabledApiForTesting() as $pair) {
list($apiToCall, $params) = $pair;
Expand Down Expand Up @@ -184,6 +186,7 @@ protected static function setUpConfigOptions()
$generalConfig['datatable_archiving_maximum_rows_subtable_custom_variables'] = 2;
$generalConfig['datatable_archiving_maximum_rows_subtable_actions'] = 2;
$generalConfig['datatable_archiving_maximum_rows_standard'] = 3;
$generalConfig['datatable_archiving_maximum_rows_userid_users'] = 3;
$generalConfig['archiving_ranking_query_row_limit'] = 50000;
// Should be more than the datatable_archiving_maximum_rows_actions as code will take the max of these two
$generalConfig['datatable_archiving_maximum_rows_site_search'] = 5;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<nb_uniq_visitors>3</nb_uniq_visitors>
<nb_visits>15</nb_visits>
<nb_actions>15</nb_actions>
<nb_users>0</nb_users>
<nb_users>3</nb_users>
<max_actions>1</max_actions>
<sum_visit_length>0</sum_visit_length>
<bounce_count>15</bounce_count>
Expand All @@ -18,7 +18,7 @@
<nb_uniq_visitors>3</nb_uniq_visitors>
<nb_visits>15</nb_visits>
<nb_actions>30</nb_actions>
<nb_users>0</nb_users>
<nb_users>3</nb_users>
<max_actions>2</max_actions>
<sum_visit_length>15</sum_visit_length>
<bounce_count>0</bounce_count>
Expand All @@ -31,7 +31,7 @@
<nb_uniq_visitors>9</nb_uniq_visitors>
<nb_visits>45</nb_visits>
<nb_actions>80</nb_actions>
<nb_users>0</nb_users>
<nb_users>9</nb_users>
<max_actions>2</max_actions>
<sum_visit_length>35</sum_visit_length>
<bounce_count>10</bounce_count>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<nb_uniq_visitors>4</nb_uniq_visitors>
<nb_visits>20</nb_visits>
<nb_actions>25</nb_actions>
<nb_users>0</nb_users>
<nb_users>4</nb_users>
<max_actions>2</max_actions>
<sum_visit_length>5</sum_visit_length>
<bounce_count>15</bounce_count>
Expand All @@ -18,7 +18,7 @@
<nb_uniq_visitors>3</nb_uniq_visitors>
<nb_visits>15</nb_visits>
<nb_actions>30</nb_actions>
<nb_users>0</nb_users>
<nb_users>3</nb_users>
<max_actions>2</max_actions>
<sum_visit_length>15</sum_visit_length>
<bounce_count>0</bounce_count>
Expand All @@ -31,7 +31,7 @@
<nb_uniq_visitors>8</nb_uniq_visitors>
<nb_visits>40</nb_visits>
<nb_actions>70</nb_actions>
<nb_users>0</nb_users>
<nb_users>8</nb_users>
<max_actions>2</max_actions>
<sum_visit_length>30</sum_visit_length>
<bounce_count>10</bounce_count>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<nb_uniq_visitors>3</nb_uniq_visitors>
<nb_visits>4</nb_visits>
<nb_actions>4</nb_actions>
<nb_users>0</nb_users>
<nb_users>3</nb_users>
<max_actions>1</max_actions>
<sum_visit_length>0</sum_visit_length>
<bounce_count>4</bounce_count>
Expand All @@ -17,7 +17,7 @@
<nb_uniq_visitors>3</nb_uniq_visitors>
<nb_visits>3</nb_visits>
<nb_actions>3</nb_actions>
<nb_users>0</nb_users>
<nb_users>3</nb_users>
<max_actions>1</max_actions>
<sum_visit_length>0</sum_visit_length>
<bounce_count>3</bounce_count>
Expand All @@ -29,7 +29,7 @@
<nb_uniq_visitors>2</nb_uniq_visitors>
<nb_visits>2</nb_visits>
<nb_actions>2</nb_actions>
<nb_users>0</nb_users>
<nb_users>2</nb_users>
<max_actions>1</max_actions>
<sum_visit_length>0</sum_visit_length>
<bounce_count>2</bounce_count>
Expand All @@ -41,7 +41,7 @@
<nb_uniq_visitors>2</nb_uniq_visitors>
<nb_visits>2</nb_visits>
<nb_actions>2</nb_actions>
<nb_users>0</nb_users>
<nb_users>2</nb_users>
<max_actions>1</max_actions>
<sum_visit_length>0</sum_visit_length>
<bounce_count>2</bounce_count>
Expand All @@ -53,7 +53,7 @@
<nb_uniq_visitors>9</nb_uniq_visitors>
<nb_visits>9</nb_visits>
<nb_actions>9</nb_actions>
<nb_users>0</nb_users>
<nb_users>9</nb_users>
<max_actions>2</max_actions>
<sum_visit_length>0</sum_visit_length>
<bounce_count>9</bounce_count>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<nb_uniq_visitors>3</nb_uniq_visitors>
<nb_visits>3</nb_visits>
<nb_actions>3</nb_actions>
<nb_users>0</nb_users>
<nb_users>3</nb_users>
<max_actions>1</max_actions>
<sum_visit_length>0</sum_visit_length>
<bounce_count>3</bounce_count>
Expand All @@ -17,7 +17,7 @@
<nb_uniq_visitors>1</nb_uniq_visitors>
<nb_visits>1</nb_visits>
<nb_actions>1</nb_actions>
<nb_users>0</nb_users>
<nb_users>1</nb_users>
<max_actions>1</max_actions>
<sum_visit_length>0</sum_visit_length>
<bounce_count>1</bounce_count>
Expand All @@ -28,7 +28,7 @@
<nb_uniq_visitors>2</nb_uniq_visitors>
<nb_visits>2</nb_visits>
<nb_actions>2</nb_actions>
<nb_users>0</nb_users>
<nb_users>2</nb_users>
<max_actions>1</max_actions>
<sum_visit_length>0</sum_visit_length>
<bounce_count>2</bounce_count>
Expand All @@ -41,7 +41,7 @@
<nb_uniq_visitors>2</nb_uniq_visitors>
<nb_visits>2</nb_visits>
<nb_actions>2</nb_actions>
<nb_users>0</nb_users>
<nb_users>2</nb_users>
<max_actions>1</max_actions>
<sum_visit_length>0</sum_visit_length>
<bounce_count>2</bounce_count>
Expand All @@ -53,7 +53,7 @@
<nb_uniq_visitors>1</nb_uniq_visitors>
<nb_visits>1</nb_visits>
<nb_actions>1</nb_actions>
<nb_users>0</nb_users>
<nb_users>1</nb_users>
<max_actions>1</max_actions>
<sum_visit_length>0</sum_visit_length>
<bounce_count>1</bounce_count>
Expand All @@ -64,7 +64,7 @@
<nb_uniq_visitors>1</nb_uniq_visitors>
<nb_visits>1</nb_visits>
<nb_actions>1</nb_actions>
<nb_users>0</nb_users>
<nb_users>1</nb_users>
<max_actions>1</max_actions>
<sum_visit_length>0</sum_visit_length>
<bounce_count>1</bounce_count>
Expand All @@ -77,7 +77,7 @@
<nb_uniq_visitors>7</nb_uniq_visitors>
<nb_visits>7</nb_visits>
<nb_actions>7</nb_actions>
<nb_users>0</nb_users>
<nb_users>7</nb_users>
<max_actions>1</max_actions>
<sum_visit_length>0</sum_visit_length>
<bounce_count>7</bounce_count>
Expand Down
Loading