mirror of
https://github.com/appwrite/appwrite
synced 2026-04-21 13:37:16 +00:00
548 lines
22 KiB
PHP
548 lines
22 KiB
PHP
<?php
|
|
|
|
namespace Appwrite\Platform\Workers;
|
|
|
|
use Exception;
|
|
use Throwable;
|
|
use Utopia\Console;
|
|
use Utopia\Database\Database;
|
|
use Utopia\Database\DateTime;
|
|
use Utopia\Database\Document;
|
|
use Utopia\Platform\Action;
|
|
use Utopia\Queue\Message;
|
|
use Utopia\Registry\Registry;
|
|
use Utopia\System\System;
|
|
|
|
class StatsUsage extends Action
|
|
{
|
|
/**
|
|
* In memory per project metrics calculation
|
|
*/
|
|
protected array $stats = [];
|
|
protected int $lastTriggeredTime = 0;
|
|
protected int $keys = 0;
|
|
protected const INFINITY_PERIOD = '_inf_';
|
|
protected const BATCH_SIZE_DEVELOPMENT = 1;
|
|
protected const BATCH_SIZE_PRODUCTION = 10_000;
|
|
|
|
/**
|
|
* Stats for batch write separated per project
|
|
* @var array
|
|
*/
|
|
protected array $projects = [];
|
|
|
|
/**
|
|
* Array of stat documents to batch write to logsDB
|
|
* @var array
|
|
*/
|
|
protected array $statDocuments = [];
|
|
|
|
protected Registry $register;
|
|
|
|
/**
|
|
* Metrics to skip writing to logsDB
|
|
* As these metrics are calculated separately
|
|
* by logs DB
|
|
* @var array
|
|
*/
|
|
protected array $skipBaseMetrics = [
|
|
METRIC_DATABASES => true,
|
|
METRIC_BUCKETS => true,
|
|
METRIC_USERS => true,
|
|
METRIC_FUNCTIONS => true,
|
|
METRIC_TEAMS => true,
|
|
METRIC_MESSAGES => true,
|
|
METRIC_MAU => true,
|
|
METRIC_WEBHOOKS => true,
|
|
METRIC_PLATFORMS => true,
|
|
METRIC_PROVIDERS => true,
|
|
METRIC_TOPICS => true,
|
|
METRIC_KEYS => true,
|
|
METRIC_FILES => true,
|
|
METRIC_FILES_STORAGE => true,
|
|
METRIC_DEPLOYMENTS_STORAGE => true,
|
|
METRIC_BUILDS_STORAGE => true,
|
|
METRIC_DEPLOYMENTS => true,
|
|
METRIC_BUILDS => true,
|
|
METRIC_COLLECTIONS => true,
|
|
METRIC_DOCUMENTS => true,
|
|
METRIC_DATABASES_STORAGE => true,
|
|
];
|
|
|
|
/**
|
|
* Skip metrics associated with parent IDs
|
|
* these need to be checked individually with `str_ends_with`
|
|
*/
|
|
protected array $skipParentIdMetrics = [
|
|
'.files',
|
|
'.files.storage',
|
|
'.collections',
|
|
'.documents',
|
|
'.deployments',
|
|
'.deployments.storage',
|
|
'.builds',
|
|
'.builds.storage',
|
|
'.databases.storage'
|
|
];
|
|
|
|
/**
|
|
* @var callable(): Database
|
|
*/
|
|
protected mixed $getLogsDB;
|
|
|
|
protected array $periods = [
|
|
'1h' => 'Y-m-d H:00',
|
|
'1d' => 'Y-m-d 00:00',
|
|
'inf' => '0000-00-00 00:00'
|
|
];
|
|
|
|
public static function getName(): string
|
|
{
|
|
return 'stats-usage';
|
|
}
|
|
|
|
protected function getBatchSize(): int
|
|
{
|
|
return System::getEnv('_APP_ENV', 'development') === 'development'
|
|
? self::BATCH_SIZE_DEVELOPMENT
|
|
: self::BATCH_SIZE_PRODUCTION;
|
|
}
|
|
/**
|
|
* @throws Exception
|
|
*/
|
|
public function __construct()
|
|
{
|
|
|
|
$this
|
|
->desc('Stats usage worker')
|
|
->inject('message')
|
|
->inject('getProjectDB')
|
|
->inject('getLogsDB')
|
|
->inject('register')
|
|
->callback($this->action(...));
|
|
|
|
$this->lastTriggeredTime = time();
|
|
}
|
|
|
|
/**
|
|
* @param Message $message
|
|
* @param callable(): Database $getProjectDB
|
|
* @param callable(): Database $getLogsDB
|
|
* @param Registry $register
|
|
* @return void
|
|
* @throws \Utopia\Database\Exception
|
|
* @throws Exception
|
|
*/
|
|
public function action(Message $message, callable $getProjectDB, callable $getLogsDB, Registry $register): void
|
|
{
|
|
$this->getLogsDB = $getLogsDB;
|
|
$this->register = $register;
|
|
$payload = $message->getPayload() ?? [];
|
|
if (empty($payload)) {
|
|
throw new Exception('Missing payload');
|
|
}
|
|
//Todo Figure out way to preserve keys when the container is being recreated @shimonewman
|
|
|
|
$aggregationInterval = (int) System::getEnv('_APP_USAGE_AGGREGATION_INTERVAL', '20');
|
|
$project = new Document($payload['project'] ?? []);
|
|
$projectId = $project->getSequence();
|
|
foreach ($payload['reduce'] ?? [] as $document) {
|
|
if (empty($document)) {
|
|
continue;
|
|
}
|
|
|
|
$this->reduce(
|
|
project: $project,
|
|
document: new Document($document),
|
|
metrics: $payload['metrics'],
|
|
getProjectDB: $getProjectDB
|
|
);
|
|
}
|
|
|
|
$this->stats[$projectId]['project'] = $project;
|
|
$this->stats[$projectId]['receivedAt'] = DateTime::format(new \DateTime('@' . $message->getTimestamp()));
|
|
foreach ($payload['metrics'] ?? [] as $metric) {
|
|
$this->keys++;
|
|
if (!isset($this->stats[$projectId]['keys'][$metric['key']])) {
|
|
$this->stats[$projectId]['keys'][$metric['key']] = $metric['value'];
|
|
continue;
|
|
}
|
|
|
|
$this->stats[$projectId]['keys'][$metric['key']] += $metric['value'];
|
|
}
|
|
|
|
// If keys crossed threshold or X time passed since the last send and there are some keys in the array ($this->stats)
|
|
if (
|
|
$this->keys >= $this->getBatchSize() ||
|
|
(time() - $this->lastTriggeredTime > $aggregationInterval && $this->keys > 0)
|
|
) {
|
|
Console::warning('[' . DateTime::now() . '] Aggregated ' . $this->keys . ' keys');
|
|
|
|
$this->commitToDB($getProjectDB);
|
|
|
|
$this->stats = [];
|
|
$this->keys = 0;
|
|
$this->lastTriggeredTime = time();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* On Documents that tied by relations like functions>deployments>build || documents>collection>database || buckets>files.
|
|
* When we remove a parent document we need to deduct his children aggregation from the project scope.
|
|
* @param Document $project
|
|
* @param Document $document
|
|
* @param array $metrics
|
|
* @param callable(): Database $getProjectDB
|
|
* @return void
|
|
*/
|
|
protected function reduce(Document $project, Document $document, array &$metrics, callable $getProjectDB): void
|
|
{
|
|
$dbForProject = $getProjectDB($project);
|
|
|
|
try {
|
|
switch (true) {
|
|
case $document->getCollection() === 'users': // users
|
|
$sessions = count($document->getAttribute(METRIC_SESSIONS, 0));
|
|
if (!empty($sessions)) {
|
|
$metrics[] = [
|
|
'key' => METRIC_SESSIONS,
|
|
'value' => ($sessions * -1),
|
|
];
|
|
}
|
|
break;
|
|
case $document->getCollection() === 'databases': // databases
|
|
$collections = $dbForProject->getDocument('stats', md5(self::INFINITY_PERIOD . str_replace('{databaseInternalId}', $document->getSequence(), METRIC_DATABASE_ID_COLLECTIONS)));
|
|
$documents = $dbForProject->getDocument('stats', md5(self::INFINITY_PERIOD . str_replace('{databaseInternalId}', $document->getSequence(), METRIC_DATABASE_ID_DOCUMENTS)));
|
|
if (!empty($collections['value'])) {
|
|
$metrics[] = [
|
|
'key' => METRIC_COLLECTIONS,
|
|
'value' => ($collections['value'] * -1),
|
|
];
|
|
}
|
|
|
|
if (!empty($documents['value'])) {
|
|
$metrics[] = [
|
|
'key' => METRIC_DOCUMENTS,
|
|
'value' => ($documents['value'] * -1),
|
|
];
|
|
}
|
|
break;
|
|
case str_starts_with($document->getCollection(), 'database_') && !str_contains($document->getCollection(), 'collection'): //collections
|
|
$parts = explode('_', $document->getCollection());
|
|
$databaseInternalId = $parts[1] ?? 0;
|
|
$documents = $dbForProject->getDocument('stats', md5(self::INFINITY_PERIOD . str_replace(
|
|
['{databaseInternalId}', '{collectionInternalId}'],
|
|
[$databaseInternalId, $document->getSequence()],
|
|
METRIC_DATABASE_ID_COLLECTION_ID_DOCUMENTS
|
|
)));
|
|
|
|
if (!empty($documents['value'])) {
|
|
$metrics[] = [
|
|
'key' => METRIC_DOCUMENTS,
|
|
'value' => ($documents['value'] * -1),
|
|
];
|
|
$metrics[] = [
|
|
'key' => str_replace('{databaseInternalId}', $databaseInternalId, METRIC_DATABASE_ID_DOCUMENTS),
|
|
'value' => ($documents['value'] * -1),
|
|
];
|
|
}
|
|
break;
|
|
|
|
case $document->getCollection() === 'buckets':
|
|
$files = $dbForProject->getDocument('stats', md5(self::INFINITY_PERIOD . str_replace('{bucketInternalId}', $document->getSequence(), METRIC_BUCKET_ID_FILES)));
|
|
$storage = $dbForProject->getDocument('stats', md5(self::INFINITY_PERIOD . str_replace('{bucketInternalId}', $document->getSequence(), METRIC_BUCKET_ID_FILES_STORAGE)));
|
|
|
|
if (!empty($files['value'])) {
|
|
$metrics[] = [
|
|
'key' => METRIC_FILES,
|
|
'value' => ($files['value'] * -1),
|
|
];
|
|
}
|
|
|
|
if (!empty($storage['value'])) {
|
|
$metrics[] = [
|
|
'key' => METRIC_FILES_STORAGE,
|
|
'value' => ($storage['value'] * -1),
|
|
];
|
|
}
|
|
break;
|
|
|
|
case $document->getCollection() === 'functions' || $document->getCollection() === 'sites':
|
|
$deployments = $dbForProject->getDocument('stats', md5(self::INFINITY_PERIOD . str_replace(['{resourceType}', '{resourceInternalId}'], [$document->getCollection(), $document->getSequence()], METRIC_RESOURCE_TYPE_ID_DEPLOYMENTS)));
|
|
$deploymentsStorage = $dbForProject->getDocument('stats', md5(self::INFINITY_PERIOD . str_replace(['{resourceType}', '{resourceInternalId}'], [$document->getCollection(), $document->getSequence()], METRIC_RESOURCE_TYPE_ID_DEPLOYMENTS_STORAGE)));
|
|
$builds = $dbForProject->getDocument('stats', md5(self::INFINITY_PERIOD . str_replace(['{resourceType}', '{resourceInternalId}'], [$document->getCollection(), $document->getSequence()], METRIC_RESOURCE_TYPE_ID_BUILDS)));
|
|
$buildsStorage = $dbForProject->getDocument('stats', md5(self::INFINITY_PERIOD . str_replace(['{resourceType}', '{resourceInternalId}'], [$document->getCollection(), $document->getSequence()], METRIC_RESOURCE_TYPE_ID_BUILDS_STORAGE)));
|
|
$buildsCompute = $dbForProject->getDocument('stats', md5(self::INFINITY_PERIOD . str_replace(['{resourceType}', '{resourceInternalId}'], [$document->getCollection(), $document->getSequence()], METRIC_RESOURCE_TYPE_ID_BUILDS_COMPUTE)));
|
|
$executions = $dbForProject->getDocument('stats', md5(self::INFINITY_PERIOD . str_replace(['{resourceType}', '{resourceInternalId}'], [$document->getCollection(), $document->getSequence()], METRIC_RESOURCE_TYPE_ID_EXECUTIONS)));
|
|
$executionsCompute = $dbForProject->getDocument('stats', md5(self::INFINITY_PERIOD . str_replace(['{resourceType}', '{resourceInternalId}'], [$document->getCollection(), $document->getSequence()], METRIC_RESOURCE_TYPE_ID_EXECUTIONS_COMPUTE)));
|
|
|
|
if (!empty($deployments['value'])) {
|
|
$metrics[] = [
|
|
'key' => METRIC_DEPLOYMENTS,
|
|
'value' => ($deployments['value'] * -1),
|
|
];
|
|
$metrics[] = [
|
|
'key' => str_replace("{resourceType}", $document->getCollection(), METRIC_RESOURCE_TYPE_DEPLOYMENTS),
|
|
'value' => ($deployments['value'] * -1),
|
|
];
|
|
}
|
|
|
|
if (!empty($deploymentsStorage['value'])) {
|
|
$metrics[] = [
|
|
'key' => METRIC_DEPLOYMENTS_STORAGE,
|
|
'value' => ($deploymentsStorage['value'] * -1),
|
|
];
|
|
$metrics[] = [
|
|
'key' => str_replace("{resourceType}", $document->getCollection(), METRIC_RESOURCE_TYPE_DEPLOYMENTS_STORAGE),
|
|
'value' => ($deploymentsStorage['value'] * -1),
|
|
];
|
|
}
|
|
|
|
if (!empty($builds['value'])) {
|
|
$metrics[] = [
|
|
'key' => METRIC_BUILDS,
|
|
'value' => ($builds['value'] * -1),
|
|
];
|
|
$metrics[] = [
|
|
'key' => str_replace("{resourceType}", $document->getCollection(), METRIC_RESOURCE_TYPE_BUILDS),
|
|
'value' => ($builds['value'] * -1),
|
|
];
|
|
}
|
|
|
|
if (!empty($buildsStorage['value'])) {
|
|
$metrics[] = [
|
|
'key' => METRIC_BUILDS_STORAGE,
|
|
'value' => ($buildsStorage['value'] * -1),
|
|
];
|
|
$metrics[] = [
|
|
'key' => str_replace("{resourceType}", $document->getCollection(), METRIC_RESOURCE_TYPE_BUILDS_STORAGE),
|
|
'value' => ($buildsStorage['value'] * -1),
|
|
];
|
|
}
|
|
|
|
if (!empty($buildsCompute['value'])) {
|
|
$metrics[] = [
|
|
'key' => METRIC_BUILDS_COMPUTE,
|
|
'value' => ($buildsCompute['value'] * -1),
|
|
];
|
|
$metrics[] = [
|
|
'key' => str_replace("{resourceType}", $document->getCollection(), METRIC_RESOURCE_TYPE_BUILDS_COMPUTE),
|
|
'value' => ($buildsCompute['value'] * -1),
|
|
];
|
|
}
|
|
|
|
if (!empty($executions['value'])) {
|
|
$metrics[] = [
|
|
'key' => METRIC_EXECUTIONS,
|
|
'value' => ($executions['value'] * -1),
|
|
];
|
|
$metrics[] = [
|
|
'key' => str_replace("{resourceType}", $document->getCollection(), METRIC_RESOURCE_TYPE_EXECUTIONS),
|
|
'value' => ($executions['value'] * -1),
|
|
];
|
|
}
|
|
|
|
if (!empty($executionsCompute['value'])) {
|
|
$metrics[] = [
|
|
'key' => METRIC_EXECUTIONS_COMPUTE,
|
|
'value' => ($executionsCompute['value'] * -1),
|
|
];
|
|
$metrics[] = [
|
|
'key' => str_replace("{resourceType}", $document->getCollection(), METRIC_RESOURCE_TYPE_EXECUTIONS_COMPUTE),
|
|
'value' => ($executionsCompute['value'] * -1),
|
|
];
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
} catch (Throwable $e) {
|
|
Console::error("[reducer] " . " {DateTime::now()} " . " {$project->getSequence()} " . " {$e->getMessage()}");
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Commit stats to DB
|
|
* @param callable(): Database $getProjectDB
|
|
* @return void
|
|
*/
|
|
public function commitToDb(callable $getProjectDB): void
|
|
{
|
|
foreach ($this->stats as $stats) {
|
|
$project = $stats['project'] ?? new Document([]);
|
|
$numberOfKeys = !empty($stats['keys']) ? count($stats['keys']) : 0;
|
|
$receivedAt = $stats['receivedAt'] ?? null;
|
|
if ($numberOfKeys === 0) {
|
|
continue;
|
|
}
|
|
|
|
Console::log('['.DateTime::now().'] Id: '.$project->getId(). ' InternalId: '.$project->getSequence(). ' Db: '.$project->getAttribute('database').' ReceivedAt: '.$receivedAt. ' Keys: '.$numberOfKeys);
|
|
|
|
try {
|
|
foreach ($stats['keys'] ?? [] as $key => $value) {
|
|
if ($value == 0) {
|
|
continue;
|
|
}
|
|
|
|
foreach ($this->periods as $period => $format) {
|
|
$time = null;
|
|
|
|
if ($period !== 'inf') {
|
|
$time = !empty($receivedAt) ? (new \DateTime($receivedAt))->format($format) : date($format, time());
|
|
}
|
|
$id = \md5("{$time}_{$period}_{$key}");
|
|
|
|
$document = new Document([
|
|
'$id' => $id,
|
|
'period' => $period,
|
|
'time' => $time,
|
|
'metric' => $key,
|
|
'value' => $value,
|
|
'region' => System::getEnv('_APP_REGION', 'default'),
|
|
]);
|
|
|
|
|
|
$this->projects[$project->getSequence()]['project'] = new Document([
|
|
'$id' => $project->getId(),
|
|
'$sequence' => $project->getSequence(),
|
|
'database' => $project->getAttribute('database'),
|
|
]);
|
|
$this->projects[$project->getSequence()]['stats'][] = $document;
|
|
|
|
$this->prepareForLogsDB($project, $document);
|
|
}
|
|
}
|
|
} catch (Exception $e) {
|
|
Console::error('[' . DateTime::now() . '] project [' . $project->getSequence() . '] database [' . $project['database'] . '] ' . ' ' . $e->getMessage());
|
|
}
|
|
}
|
|
|
|
foreach ($this->projects as $sequence => $projectStats) {
|
|
if (empty($sequence)) {
|
|
continue;
|
|
}
|
|
try {
|
|
$dbForProject = $getProjectDB($projectStats['project']);
|
|
Console::log('Processing batch with ' . count($projectStats['stats']) . ' stats');
|
|
|
|
/**
|
|
* Sort by unique index key reduce locks/deadlocks
|
|
*/
|
|
usort($projectStats['stats'], function ($a, $b) use ($sequence) {
|
|
// Metric DESC
|
|
$cmp = strcmp($b['metric'], $a['metric']);
|
|
if ($cmp !== 0) {
|
|
return $cmp;
|
|
}
|
|
|
|
// Period ASC
|
|
$cmp = strcmp($a['period'], $b['period']);
|
|
if ($cmp !== 0) {
|
|
return $cmp;
|
|
}
|
|
|
|
// Time ASC, NULLs first
|
|
if ($a['time'] === null) {
|
|
return ($b['time'] === null) ? 0 : -1;
|
|
}
|
|
if ($b['time'] === null) {
|
|
return 1;
|
|
}
|
|
|
|
return strcmp($a['time'], $b['time']);
|
|
});
|
|
|
|
$dbForProject->upsertDocumentsWithIncrease('stats', 'value', $projectStats['stats']);
|
|
Console::success('Batch successfully written to DB');
|
|
} catch (Throwable $e) {
|
|
Console::error('Error processing stats: ' . $e->getMessage());
|
|
} finally {
|
|
unset($this->projects[$sequence]);
|
|
}
|
|
}
|
|
|
|
$this->writeToLogsDB();
|
|
|
|
}
|
|
|
|
protected function prepareForLogsDB(Document $project, Document $stat): void
|
|
{
|
|
if (System::getEnv('_APP_STATS_USAGE_DUAL_WRITING', 'disabled') === 'disabled') {
|
|
return;
|
|
}
|
|
if (array_key_exists($stat->getAttribute('metric'), $this->skipBaseMetrics)) {
|
|
return;
|
|
}
|
|
foreach ($this->skipParentIdMetrics as $skipMetric) {
|
|
if (str_ends_with($stat->getAttribute('metric'), $skipMetric)) {
|
|
return;
|
|
}
|
|
}
|
|
$documentClone = clone $stat;
|
|
$documentClone->setAttribute('$tenant', (int) $project->getSequence());
|
|
$this->statDocuments[] = $documentClone;
|
|
}
|
|
|
|
protected function writeToLogsDB(): void
|
|
{
|
|
if (System::getEnv('_APP_STATS_USAGE_DUAL_WRITING', 'disabled') === 'disabled') {
|
|
Console::log('Dual Writing is disabled. Skipping...');
|
|
return;
|
|
}
|
|
|
|
$dbForLogs = ($this->getLogsDB)()
|
|
->setTenant(null)
|
|
->setTenantPerDocument(true);
|
|
|
|
try {
|
|
Console::log('Processing batch with ' . count($this->statDocuments) . ' stats');
|
|
|
|
/**
|
|
* Sort by UNIQUE KEY "_key_metric_period_time" ("_tenant","metric" DESC,"period","time")
|
|
* Here we sort by _tenant as well because of setTenantPerDocument
|
|
*/
|
|
|
|
usort($this->statDocuments, function ($a, $b) {
|
|
// Tenant ASC
|
|
$cmp = $a['$tenant'] <=> $b['$tenant'];
|
|
if ($cmp !== 0) {
|
|
return $cmp;
|
|
}
|
|
|
|
// Metric DESC
|
|
$cmp = strcmp($b['metric'], $a['metric']);
|
|
if ($cmp !== 0) {
|
|
return $cmp;
|
|
}
|
|
|
|
// Period ASC
|
|
$cmp = strcmp($a['period'], $b['period']);
|
|
if ($cmp !== 0) {
|
|
return $cmp;
|
|
}
|
|
|
|
// Time ASC, NULLs first
|
|
if ($a['time'] === null) {
|
|
return ($b['time'] === null) ? 0 : -1;
|
|
}
|
|
if ($b['time'] === null) {
|
|
return 1;
|
|
}
|
|
|
|
return strcmp($a['time'], $b['time']);
|
|
});
|
|
|
|
$dbForLogs->upsertDocumentsWithIncrease(
|
|
'stats',
|
|
'value',
|
|
$this->statDocuments
|
|
);
|
|
Console::success('Usage logs pushed to Logs DB');
|
|
} catch (Throwable $th) {
|
|
Console::error($th->getMessage());
|
|
} finally {
|
|
// Clear statDocuments to prevent memory accumulation across batches
|
|
$this->statDocuments = [];
|
|
}
|
|
}
|
|
}
|