diff --git a/src/Audit/Adapter/ClickHouse.php b/src/Audit/Adapter/ClickHouse.php index 8a36bab..f77cf70 100644 --- a/src/Audit/Adapter/ClickHouse.php +++ b/src/Audit/Adapter/ClickHouse.php @@ -475,54 +475,76 @@ private function getTableName(): string /** * Execute a ClickHouse query via HTTP interface using Fetch Client. * - * Uses ClickHouse query parameters (sent as POST multipart form data) to prevent SQL injection. - * This is ClickHouse's native parameter mechanism - parameters are safely - * transmitted separately from the query structure. + * This unified method supports two modes of operation: * - * Parameters are referenced in the SQL using the syntax: {paramName:Type}. - * For example: SELECT * WHERE id = {id:String} + * 1. **Parameterized queries** (when $params is provided): + * Uses ClickHouse query parameters sent as POST multipart form data. + * Parameters are referenced in SQL using syntax: {paramName:Type} + * Example: SELECT * WHERE id = {id:String} * - * ClickHouse handles all parameter escaping and type conversion internally, - * making this approach fully injection-safe without needing manual escaping. + * 2. **JSON body queries** (when $jsonRows is provided): + * Uses JSONEachRow format for optimal INSERT performance. + * SQL is sent via URL query string, JSON data as POST body. + * Each row is a JSON object on a separate line. * - * Using POST body avoids URL length limits for batch operations with many parameters. - * Equivalent to: curl -X POST -F 'query=...' -F 'param_key=value' http://host/ + * ClickHouse handles all parameter escaping and type conversion internally, + * making both approaches fully injection-safe. * - * @param array $params Key-value pairs for query parameters + * @param string $sql The SQL query to execute + * @param array $params Key-value pairs for query parameters (for SELECT/UPDATE/DELETE) + * @param array>|null $jsonRows Array of rows for JSONEachRow INSERT operations + * @return string Response body * @throws Exception */ - private function query(string $sql, array $params = []): string + private function query(string $sql, array $params = [], ?array $jsonRows = null): string { $scheme = $this->secure ? 'https' : 'http'; - $url = "{$scheme}://{$this->host}:{$this->port}/"; // Update the database header for each query (in case setDatabase was called) $this->client->addHeader('X-ClickHouse-Database', $this->database); - // Build multipart form data body with query and parameters - // The Fetch client will automatically encode arrays as multipart/form-data - $body = ['query' => $sql]; - foreach ($params as $key => $value) { - $body['param_' . $key] = $this->formatParamValue($value); - } - try { + if ($jsonRows !== null) { + // JSON body mode for INSERT operations with JSONEachRow format + $url = "{$scheme}://{$this->host}:{$this->port}/?query=" . urlencode($sql); + + // Build JSONEachRow body - each row on a separate line + $jsonLines = []; + foreach ($jsonRows as $row) { + try { + $encoded = json_encode($row, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_THROW_ON_ERROR); + } catch (\JsonException $e) { + throw new Exception('Failed to encode row to JSON: ' . $e->getMessage()); + } + $jsonLines[] = $encoded; + } + $body = implode("\n", $jsonLines); + } else { + // Parameterized query mode using multipart form data + $url = "{$scheme}://{$this->host}:{$this->port}/"; + + // Build multipart form data body with query and parameters + $body = ['query' => $sql]; + foreach ($params as $key => $value) { + $body['param_' . $key] = $this->formatParamValue($value); + } + } + $response = $this->client->fetch( url: $url, method: Client::METHOD_POST, body: $body ); + if ($response->getStatusCode() !== 200) { - $bodyStr = $response->getBody(); - $bodyStr = is_string($bodyStr) ? $bodyStr : ''; - throw new Exception("ClickHouse query failed with HTTP {$response->getStatusCode()}: {$bodyStr}"); + $responseBody = $response->getBody(); + $responseBody = is_string($responseBody) ? $responseBody : ''; + throw new Exception("ClickHouse query failed with HTTP {$response->getStatusCode()}: {$responseBody}"); } - $body = $response->getBody(); - return is_string($body) ? $body : ''; + $responseBody = $response->getBody(); + return is_string($responseBody) ? $responseBody : ''; } catch (Exception $e) { - // Preserve the original exception context for better debugging - // Re-throw with additional context while maintaining the original exception chain throw new Exception( "ClickHouse query execution failed: {$e->getMessage()}", 0, @@ -555,8 +577,11 @@ private function formatParamValue(mixed $value): string } if (is_array($value)) { - $encoded = json_encode($value); - return is_string($encoded) ? $encoded : ''; + try { + return json_encode($value, JSON_THROW_ON_ERROR); + } catch (\JsonException $e) { + throw new Exception('Failed to encode array parameter to JSON: ' . $e->getMessage()); + } } if (is_string($value)) { @@ -726,7 +751,7 @@ private function formatDateTime(\DateTime|string|null $dateTime): string } /** - * Create an audit log entry. + * Create an audit log entry using JSONEachRow format for optimal performance. * * @param array $log The log data * @throws Exception @@ -746,10 +771,8 @@ public function create(array $log): Log /** @var array $logData */ $logData = $log['data'] ?? []; - // Build column list and placeholders dynamically from attributes - $insertColumns = ['id', 'time']; - $valuePlaceholders = ['{id:String}', '{time:String}']; - $queryParams = [ + // Build JSON row for JSONEachRow format + $row = [ 'id' => $logId, 'time' => $formattedTime, ]; @@ -772,10 +795,9 @@ public function create(array $log): Log continue; } - // Get attribute metadata to determine if required and nullable + // Get attribute metadata to determine if required $attributeMetadata = $this->getAttribute($columnName); $isRequiredAttribute = $attributeMetadata !== null && isset($attributeMetadata['required']) && $attributeMetadata['required']; - $isNullableAttribute = $attributeMetadata !== null && (!isset($attributeMetadata['required']) || !$attributeMetadata['required']); // For 'data' column, we'll handle it separately at the end if ($columnName === 'data') { @@ -808,39 +830,26 @@ public function create(array $log): Log } if ($hasAttributeValue) { - $insertColumns[] = $columnName; - $queryParams[$columnName] = $attributeValue; - - // Determine placeholder type based on attribute metadata - if ($isNullableAttribute) { - $valuePlaceholders[] = '{' . $columnName . ':Nullable(String)}'; - } else { - $valuePlaceholders[] = '{' . $columnName . ':String}'; - } + $row[$columnName] = $attributeValue; } } // Add the data column with remaining non-schema attributes - $insertColumns[] = 'data'; - $queryParams['data'] = json_encode($nonSchemaData); - $valuePlaceholders[] = '{data:Nullable(String)}'; + try { + $encodedData = json_encode($nonSchemaData, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_THROW_ON_ERROR); + } catch (\JsonException $e) { + throw new Exception('Failed to encode data column to JSON: ' . $e->getMessage()); + } + $row['data'] = $encodedData; if ($this->sharedTables) { - $insertColumns[] = 'tenant'; - $valuePlaceholders[] = '{tenant:Nullable(UInt64)}'; - $queryParams['tenant'] = $this->tenant; + $row['tenant'] = $this->tenant; } $escapedDatabaseAndTable = $this->escapeIdentifier($this->database) . '.' . $this->escapeIdentifier($tableName); - $insertSql = " - INSERT INTO {$escapedDatabaseAndTable} - (" . implode(', ', $insertColumns) . ") - VALUES ( - " . implode(", ", $valuePlaceholders) . " - ) - "; + $insertSql = "INSERT INTO {$escapedDatabaseAndTable} FORMAT JSONEachRow"; - $this->query($insertSql, $queryParams); + $this->query($insertSql, [], [$row]); // Retrieve the created log using getById to ensure consistency $createdLog = $this->getById($logId); @@ -852,7 +861,7 @@ public function create(array $log): Log } /** - * Get a single log by its ID. + * Get a single log by its ID using JSON format for reliable parsing. * * @param string $id * @return Log|null The log entry or null if not found @@ -870,17 +879,17 @@ public function getById(string $id): ?Log FROM {$escapedTable} WHERE {$escapedId} = {id:String}{$tenantFilter} LIMIT 1 - FORMAT TabSeparated + FORMAT JSON "; $result = $this->query($sql, ['id' => $id]); - $logs = $this->parseResults($result); + $logs = $this->parseJsonResults($result); return $logs[0] ?? null; } /** - * Find logs using Query objects. + * Find logs using Query objects with JSON format for reliable parsing. * * @param array $queries * @return array @@ -921,11 +930,11 @@ public function find(array $queries = []): array $sql = " SELECT {$selectColumns} FROM {$escapedTable}{$whereClause}{$orderClause}{$limitClause}{$offsetClause} - FORMAT TabSeparated + FORMAT JSON "; $result = $this->query($sql, $parsed['params']); - return $this->parseResults($result); + return $this->parseJsonResults($result); } /** @@ -1125,7 +1134,7 @@ private function parseQueries(array $queries): array } /** - * Create multiple audit log entries in batch. + * Create multiple audit log entries in batch using JSONEachRow format for optimal performance. * * @param array> $logs The logs to insert * @throws Exception @@ -1142,8 +1151,9 @@ public function createBatch(array $logs): bool // Get all attribute column names $schemaColumns = $this->getColumnNames(); - // Process each log to extract additional attributes from data - $processedLogs = []; + // Build JSON rows for JSONEachRow format + $rows = []; + foreach ($logs as $log) { /** @var array $logData */ $logData = $log['data'] ?? []; @@ -1176,209 +1186,120 @@ public function createBatch(array $logs): bool } } - // Update data with remaining non-schema attributes - $processedLog['data'] = $nonSchemaData; - $processedLogs[] = $processedLog; - } - - // Build column list starting with id and time - $insertColumns = ['id', 'time']; - - // Determine which attribute columns are present in any log - $presentColumns = []; - foreach ($processedLogs as $processedLog) { - foreach ($schemaColumns as $columnName) { - if ($columnName === 'time') { - continue; // Already in insertColumns - } - if (isset($processedLog[$columnName]) && !in_array($columnName, $presentColumns, true)) { - $presentColumns[] = $columnName; - } - } - } - - // Add present columns in the order they're defined in attributes - foreach ($schemaColumns as $columnName) { - if ($columnName === 'time') { - continue; // Already added - } - if (in_array($columnName, $presentColumns, true)) { - $insertColumns[] = $columnName; - } - } - - if ($this->sharedTables) { - $insertColumns[] = 'tenant'; - } - - $paramCounter = 0; - $queryParams = []; - $valueClauses = []; - - foreach ($processedLogs as $processedLog) { + // Build JSON row $logId = uniqid('', true); - $valuePlaceholders = []; - // Add id - $paramKey = 'id_' . $paramCounter; - $queryParams[$paramKey] = $logId; - $valuePlaceholders[] = '{' . $paramKey . ':String}'; - - // Add time /** @var string|\DateTime|null $providedTime */ $providedTime = $processedLog['time'] ?? null; $formattedTime = $this->formatDateTime($providedTime); - $paramKey = 'time_' . $paramCounter; - $queryParams[$paramKey] = $formattedTime; - $valuePlaceholders[] = '{' . $paramKey . ':String}'; - // Add all other present columns - foreach ($insertColumns as $columnName) { - if ($columnName === 'id' || $columnName === 'time' || $columnName === 'tenant') { + $row = [ + 'id' => $logId, + 'time' => $formattedTime, + ]; + + // Add all other columns + foreach ($schemaColumns as $columnName) { + if ($columnName === 'time') { continue; // Already handled } - $paramKey = $columnName . '_' . $paramCounter; - - // Get attribute metadata to determine if required and nullable + // Get attribute metadata to determine if required $attributeMetadata = $this->getAttribute($columnName); $isRequiredAttribute = $attributeMetadata !== null && isset($attributeMetadata['required']) && $attributeMetadata['required']; - $isNullableAttribute = $attributeMetadata !== null && (!isset($attributeMetadata['required']) || !$attributeMetadata['required']); - - $attributeValue = null; - $hasAttributeValue = false; if ($columnName === 'data') { - // Data column - encode as JSON - /** @var array $dataValue */ - $dataValue = $processedLog['data']; - $attributeValue = json_encode($dataValue); - $hasAttributeValue = true; + // Data column - encode remaining non-schema data as JSON + try { + $encodedData = json_encode($nonSchemaData, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_THROW_ON_ERROR); + } catch (\JsonException $e) { + throw new Exception('Failed to encode data column to JSON: ' . $e->getMessage()); + } + $row['data'] = $encodedData; } elseif (isset($processedLog[$columnName])) { - $attributeValue = $processedLog[$columnName]; - $hasAttributeValue = true; - } - - // Validate required attributes - if ($isRequiredAttribute && !$hasAttributeValue) { + $row[$columnName] = $processedLog[$columnName]; + } elseif ($isRequiredAttribute) { throw new \InvalidArgumentException("Required attribute '{$columnName}' is missing in batch log entry"); } - - $queryParams[$paramKey] = $attributeValue; - - // Determine placeholder type based on attribute metadata - if ($isNullableAttribute) { - $valuePlaceholders[] = '{' . $paramKey . ':Nullable(String)}'; - } else { - $valuePlaceholders[] = '{' . $paramKey . ':String}'; - } } if ($this->sharedTables) { - $paramKey = 'tenant_' . $paramCounter; - $queryParams[$paramKey] = $this->tenant; - $valuePlaceholders[] = '{' . $paramKey . ':Nullable(UInt64)}'; + $row['tenant'] = $this->tenant; } - $valueClauses[] = '(' . implode(', ', $valuePlaceholders) . ')'; - $paramCounter++; + $rows[] = $row; } - $insertSql = " - INSERT INTO {$escapedDatabaseAndTable} - (" . implode(', ', $insertColumns) . ") - VALUES " . implode(', ', $valueClauses); + $insertSql = "INSERT INTO {$escapedDatabaseAndTable} FORMAT JSONEachRow"; - $this->query($insertSql, $queryParams); + $this->query($insertSql, [], $rows); return true; } /** - * Parse ClickHouse query result into Log objects. - * Dynamically maps columns based on current attribute definitions. + * Parse ClickHouse JSON format results into Log objects. + * JSON format provides structured data with automatic type handling. * + * @param string $result The JSON response from ClickHouse * @return array + * @throws Exception If JSON parsing fails */ - private function parseResults(string $result): array + private function parseJsonResults(string $result): array { if (empty(trim($result))) { return []; } - $lines = explode("\n", trim($result)); - $documents = []; - - // Build the expected column order dynamically (matching getSelectColumns order) - $selectColumns = ['id']; - foreach ($this->getAttributes() as $attribute) { - $id = $attribute['$id']; - if ($id !== 'data') { - $selectColumns[] = $id; - } + /** @var array|null $decoded */ + $decoded = json_decode($result, true); + if ($decoded === null && json_last_error() !== JSON_ERROR_NONE) { + throw new Exception('Failed to parse ClickHouse JSON response: ' . json_last_error_msg()); } - $selectColumns[] = 'data'; - if ($this->sharedTables) { - $selectColumns[] = 'tenant'; + if (!is_array($decoded) || !isset($decoded['data']) || !is_array($decoded['data'])) { + return []; } - $expectedColumns = count($selectColumns); - - foreach ($lines as $line) { - if (empty(trim($line))) { - continue; - } + /** @var array> $data */ + $data = $decoded['data']; + $documents = []; - $columns = explode("\t", $line); - if (count($columns) < $expectedColumns) { + foreach ($data as $row) { + if (!is_array($row)) { continue; } - // Helper function to parse nullable string fields - // ClickHouse TabSeparated format uses \N for NULL, but empty strings are also treated as null for nullable fields - $parseNullableString = static function ($value): ?string { - if ($value === '\\N' || $value === '') { - return null; - } - return $value; - }; - - // Build document dynamically by mapping columns to values $document = []; - foreach ($selectColumns as $index => $columnName) { - if (!isset($columns[$index])) { - continue; - } - - $value = $columns[$index]; + foreach ($row as $columnName => $value) { if ($columnName === 'data') { // Decode JSON data column - $document[$columnName] = json_decode($value, true) ?? []; + if (is_string($value)) { + $document[$columnName] = json_decode($value, true) ?? []; + } else { + $document[$columnName] = $value ?? []; + } } elseif ($columnName === 'tenant') { // Parse tenant as integer or null - $document[$columnName] = ($value === '\\N' || $value === '') ? null : (int) $value; + if ($value === null || $value === '') { + $document[$columnName] = null; + } elseif (is_numeric($value)) { + $document[$columnName] = (int) $value; + } else { + $document[$columnName] = null; + } } elseif ($columnName === 'time') { // Convert ClickHouse timestamp format back to ISO 8601 - // ClickHouse: 2025-12-07 23:33:54.493 - // ISO 8601: 2025-12-07T23:33:54.493+00:00 - $parsedTime = $value; - if (strpos($parsedTime, 'T') === false) { + // ClickHouse JSON: "2025-12-07 23:33:54.493" + // ISO 8601: "2025-12-07T23:33:54.493+00:00" + $parsedTime = is_string($value) ? $value : (is_scalar($value) ? (string) $value : ''); + if (strpos($parsedTime, 'T') === false && $parsedTime !== '') { $parsedTime = str_replace(' ', 'T', $parsedTime) . '+00:00'; } $document[$columnName] = $parsedTime; } else { - // Get attribute metadata to check if nullable - $col = $columnName; - /** @var string $col */ - $attribute = $this->getAttribute($col); - if ($attribute && !$attribute['required']) { - // Nullable field - parse null values - $document[$columnName] = $parseNullableString($value); - } else { - // Required field - use value as-is - $document[$columnName] = $value; - } + // For other fields, handle null values + $document[$columnName] = $value; } } @@ -1444,89 +1365,6 @@ private function getTenantFilter(): string return " AND {$escapedTenant} = {$this->tenant}"; } - /** - * Build time WHERE clause and parameters with safe parameter placeholders. - * Escapes column name to prevent SQL injection. - * - * @param \DateTime|null $after - * @param \DateTime|null $before - * @return array{clause: string, params: array} - */ - /** @phpstan-ignore-next-line */ - private function buildTimeClause(?\DateTime $after, ?\DateTime $before): array - { - $params = []; - $conditions = []; - - $afterStr = null; - $beforeStr = null; - - if ($after !== null) { - /** @var \DateTime $after */ - $afterStr = \Utopia\Database\DateTime::format($after); - } - - if ($before !== null) { - /** @var \DateTime $before */ - $beforeStr = \Utopia\Database\DateTime::format($before); - } - - $escapedTime = $this->escapeIdentifier('time'); - - if ($afterStr !== null && $beforeStr !== null) { - $conditions[] = "{$escapedTime} BETWEEN {after:String} AND {before:String}"; - $params['after'] = $afterStr; - $params['before'] = $beforeStr; - - return ['clause' => ' AND ' . $conditions[0], 'params' => $params]; - } - - if ($afterStr !== null) { - $conditions[] = "{$escapedTime} > {after:String}"; - $params['after'] = $afterStr; - } - - if ($beforeStr !== null) { - $conditions[] = "{$escapedTime} < {before:String}"; - $params['before'] = $beforeStr; - } - - if ($conditions === []) { - return ['clause' => '', 'params' => []]; - } - - return [ - 'clause' => ' AND ' . implode(' AND ', $conditions), - 'params' => $params, - ]; - } - - /** - * Build a formatted SQL IN list from an array of events. - * Events are parameterized for safe SQL inclusion. - * - * @param array $events - * @param int $paramOffset Base parameter number for creating unique param names - * @return array{clause: string, params: array} - */ - /** @phpstan-ignore-next-line */ - private function buildEventsList(array $events, int $paramOffset = 0): array - { - $placeholders = []; - $params = []; - - foreach ($events as $index => $event) { - /** @var int $paramNumber */ - $paramNumber = $paramOffset + (int) $index; - $paramName = 'event_' . (string) $paramNumber; - $placeholders[] = '{' . $paramName . ':String}'; - $params[$paramName] = $event; - } - - $clause = implode(', ', $placeholders); - return ['clause' => $clause, 'params' => $params]; - } - /** * Get ClickHouse-specific SQL column definition for a given attribute ID. * diff --git a/tests/Audit/Adapter/ClickHouseTest.php b/tests/Audit/Adapter/ClickHouseTest.php index b6cf831..1374c71 100644 --- a/tests/Audit/Adapter/ClickHouseTest.php +++ b/tests/Audit/Adapter/ClickHouseTest.php @@ -328,6 +328,7 @@ public function testBatchOperationsWithSpecialCharacters(): void ] ]; + $batchEvents = $this->applyRequiredAttributesToBatch($batchEvents); $result = $this->audit->logBatch($batchEvents); $this->assertTrue($result); diff --git a/tests/Audit/AuditBase.php b/tests/Audit/AuditBase.php index cf6b42b..d3d1ab2 100644 --- a/tests/Audit/AuditBase.php +++ b/tests/Audit/AuditBase.php @@ -747,6 +747,7 @@ public function testCount(): void 'time' => $timestamp ]; } + $batchEvents = $this->applyRequiredAttributesToBatch($batchEvents); $this->audit->logBatch($batchEvents); // Test 1: Count with simple filter