-
-
Notifications
You must be signed in to change notification settings - Fork 2.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Faster User ID reports processing: use ranking query #14551
Changes from all commits
d4877bf
a0a5cf3
1996b6c
b010839
63407fb
b28a116
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,7 +10,8 @@ | |
use Piwik\Config; | ||
use Piwik\DataArray; | ||
use Piwik\DataTable; | ||
use Piwik\Metrics; | ||
use Piwik\Metrics as PiwikMetrics; | ||
use Piwik\RankingQuery; | ||
|
||
/** | ||
* Archiver that aggregates metrics per user ID (user_id field). | ||
|
@@ -77,19 +78,32 @@ protected function aggregateUsers() | |
$userIdFieldName = self::USER_ID_FIELD; | ||
$visitorIdFieldName = self::VISITOR_ID_FIELD; | ||
|
||
$rankingQueryLimit = $this->getRankingQueryLimit(); | ||
|
||
$rankingQuery = false; | ||
if ($rankingQueryLimit > 0) { | ||
$rankingQuery = new RankingQuery($rankingQueryLimit); | ||
$rankingQuery->setOthersLabel(DataTable::LABEL_SUMMARY_ROW); | ||
$rankingQuery->addLabelColumn($userIdFieldName); | ||
$rankingQuery->addLabelColumn($visitorIdFieldName); | ||
} | ||
|
||
/** @var \Zend_Db_Statement $query */ | ||
$query = $this->getLogAggregator()->queryVisitsByDimension( | ||
array(self::USER_ID_FIELD), | ||
"log_visit.$userIdFieldName IS NOT NULL AND log_visit.$userIdFieldName != ''", | ||
array("LOWER(HEX($visitorIdFieldName)) as $visitorIdFieldName") | ||
array("LOWER(HEX($visitorIdFieldName)) as $visitorIdFieldName"), | ||
$metrics = false, | ||
$rankingQuery, | ||
self::USER_ID_FIELD . ' ASC' | ||
); | ||
|
||
if ($query === false) { | ||
return; | ||
} | ||
|
||
$rowsCount = 0; | ||
while ($row = $query->fetch()) { | ||
foreach ($query as $row) { | ||
$rowsCount++; | ||
$this->arrays->sumMetricsVisits($row[$userIdFieldName], $row); | ||
$this->rememberVisitorId($row); | ||
|
@@ -105,8 +119,23 @@ protected function insertDayReports() | |
{ | ||
/** @var DataTable $dataTable */ | ||
$dataTable = $this->arrays->asDataTable(); | ||
|
||
// deal w/ ranking query summary row | ||
$rankingQuerySummaryRow = $dataTable->getRowFromLabel(DataTable::LABEL_SUMMARY_ROW); | ||
if ($rankingQuerySummaryRow) { | ||
$rankingQuerySummaryRowId = $dataTable->getRowIdFromLabel(DataTable::LABEL_SUMMARY_ROW); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you explain why this code is needed, and do we do this kind of logic in other archivers too (which ones)? if not, why is it needed in this archiver? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ranking query creates a row w/ label -1, this is added to the DataArray which is just an array, not a datatable so it has no concept of a summary row. When DataArray::asDataTable() is called, it adds the row as a regular row and not as a summary row. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok, wondering why is this logic not needed for other plugins? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is required, Actions does it here: https://github.com/matomo-org/matomo/blob/3.x-dev/plugins/Actions/ArchivingHelper.php#L383 It's not used in Events/Contents, but that's likely a bug that wasn't dealt with since it's not tested in BlobReportLimitingTest. I'll add it to my list to fix that. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @diosmosis do you mind creating an issue for this bug in case it later slips off the list? unless you will open the PR this week or so. Thanks! |
||
$dataTable->deleteRow($rankingQuerySummaryRowId); | ||
|
||
$actualSummaryRow = $dataTable->getRowFromId(DataTable::ID_SUMMARY_ROW); | ||
if ($actualSummaryRow) { | ||
$actualSummaryRow->sumRow($rankingQuerySummaryRow); | ||
} else { | ||
$dataTable->addSummaryRow($rankingQuerySummaryRow); | ||
} | ||
} | ||
|
||
$this->setVisitorIds($dataTable); | ||
$report = $dataTable->getSerialized($this->maximumRowsInDataTableLevelZero, null, Metrics::INDEX_NB_VISITS); | ||
$report = $dataTable->getSerialized($this->maximumRowsInDataTableLevelZero, null, PiwikMetrics::INDEX_NB_VISITS); | ||
$this->getProcessor()->insertBlobRecord(self::USERID_ARCHIVE_RECORD, $report); | ||
} | ||
|
||
|
@@ -137,4 +166,15 @@ private function setVisitorIds(DataTable $dataTable) | |
} | ||
} | ||
|
||
private function getRankingQueryLimit() | ||
{ | ||
$configGeneral = Config::getInstance()->General; | ||
$configLimit = $configGeneral['archiving_ranking_query_row_limit']; | ||
$limit = $configLimit == 0 ? 0 : max( | ||
$configLimit, | ||
$this->maximumRowsInDataTableLevelZero | ||
); | ||
return $limit; | ||
} | ||
|
||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
didn't know that worked 👍
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Neither did I :)