From 23fa2679a0b89ec65a6fd182144dc083dff42da3 Mon Sep 17 00:00:00 2001 From: Claudear <262350598+claudear@users.noreply.github.com> Date: Wed, 11 Mar 2026 12:34:43 +0000 Subject: [PATCH] Fix CSV row column mismatch error by handling malformed rows gracefully Instead of throwing "CSV row does not match the number of header columns" which crashes the entire migration, handle common CSV inconsistencies: - Skip empty/trailing blank rows (fgetcsv returns [''] for these) - Pad short rows with empty strings for missing trailing columns - Truncate rows with extra columns to match header count Co-Authored-By: Claude Opus 4.6 --- src/Migration/Sources/CSV.php | 16 ++++- tests/Migration/Unit/General/CSVTest.php | 68 +++++++++++++++++++ tests/Migration/resources/csv/short_rows.csv | 4 ++ .../resources/csv/trailing_empty_lines.csv | 4 ++ 4 files changed, 90 insertions(+), 2 deletions(-) create mode 100644 tests/Migration/resources/csv/short_rows.csv create mode 100644 tests/Migration/resources/csv/trailing_empty_lines.csv diff --git a/src/Migration/Sources/CSV.php b/src/Migration/Sources/CSV.php index 7d02090f..823e1c19 100644 --- a/src/Migration/Sources/CSV.php +++ b/src/Migration/Sources/CSV.php @@ -245,9 +245,21 @@ private function exportRows(int $batchSize): void $buffer = []; + $headerCount = \count($headers); + while (($row = \fgetcsv($stream, 0, $delimiter, '"', '"')) !== false) { - if (\count($row) !== \count($headers)) { - throw new \Exception('CSV row does not match the number of header columns.', Exception::CODE_VALIDATION); + $rowCount = \count($row); + + // Skip empty rows (e.g. trailing blank lines parsed as ['']) + if ($rowCount === 1 && \trim($row[0]) === '') { + continue; + } + + // Pad short rows with empty strings + if ($rowCount < $headerCount) { + $row = \array_pad($row, $headerCount, ''); + } elseif ($rowCount > $headerCount) { + $row = \array_slice($row, 0, $headerCount); } $data = \array_combine($headers, $row); diff --git a/tests/Migration/Unit/General/CSVTest.php b/tests/Migration/Unit/General/CSVTest.php index e4fd3ec1..c1d9e18b 100644 --- a/tests/Migration/Unit/General/CSVTest.php +++ b/tests/Migration/Unit/General/CSVTest.php @@ -416,6 +416,74 @@ public function testCSVExportImportCompatibility() } } + /** + * Test that CSV parsing handles trailing empty lines gracefully. + * Trailing empty lines in CSV files produce rows like [''] which have + * a different count than headers, previously causing: + * "CSV row does not match the number of header columns." + */ + public function testCSVParsingHandlesTrailingEmptyLines(): void + { + $filepath = self::RESOURCES_DIR . 'trailing_empty_lines.csv'; + $stream = fopen($filepath, 'r'); + $this->assertNotFalse($stream); + + $headers = fgetcsv($stream, 0, ',', '"', '"'); + $this->assertSame(['id', 'name', 'age'], $headers); + + $rows = []; + while (($row = fgetcsv($stream, 0, ',', '"', '"')) !== false) { + // Simulate the fixed behavior: skip empty rows + if (\count($row) === 1 && \trim($row[0]) === '') { + continue; + } + $rows[] = $row; + } + fclose($stream); + + // Should have exactly 2 data rows, trailing empty line should be skipped + $this->assertCount(2, $rows); + $this->assertSame(['1', 'Alice', '23'], $rows[0]); + $this->assertSame(['2', 'Bob', '30'], $rows[1]); + } + + /** + * Test that CSV parsing handles rows with fewer columns than headers. + * Short rows should be padded with empty strings rather than throwing. + */ + public function testCSVParsingHandlesShortRows(): void + { + $filepath = self::RESOURCES_DIR . 'short_rows.csv'; + $stream = fopen($filepath, 'r'); + $this->assertNotFalse($stream); + + $headers = fgetcsv($stream, 0, ',', '"', '"'); + $this->assertSame(['id', 'name', 'age'], $headers); + $headerCount = \count($headers); + + $rows = []; + while (($row = fgetcsv($stream, 0, ',', '"', '"')) !== false) { + if (\count($row) === 1 && \trim($row[0]) === '') { + continue; + } + // Simulate the fixed behavior: pad short rows + if (\count($row) < $headerCount) { + $row = \array_pad($row, $headerCount, ''); + } + $rows[] = \array_combine($headers, $row); + } + fclose($stream); + + $this->assertCount(3, $rows); + $this->assertSame('Alice', $rows[0]['name']); + $this->assertSame('23', $rows[0]['age']); + // Short row should have been padded + $this->assertSame('Bob', $rows[1]['name']); + $this->assertSame('', $rows[1]['age']); // Padded with empty string + $this->assertSame('Charlie', $rows[2]['name']); + $this->assertSame('25', $rows[2]['age']); + } + private function recursiveDelete(string $dir): void { if (is_dir($dir)) { diff --git a/tests/Migration/resources/csv/short_rows.csv b/tests/Migration/resources/csv/short_rows.csv new file mode 100644 index 00000000..59f84d40 --- /dev/null +++ b/tests/Migration/resources/csv/short_rows.csv @@ -0,0 +1,4 @@ +id,name,age +1,Alice,23 +2,Bob +3,Charlie,25 diff --git a/tests/Migration/resources/csv/trailing_empty_lines.csv b/tests/Migration/resources/csv/trailing_empty_lines.csv new file mode 100644 index 00000000..2ec84fcb --- /dev/null +++ b/tests/Migration/resources/csv/trailing_empty_lines.csv @@ -0,0 +1,4 @@ +id,name,age +1,Alice,23 +2,Bob,30 +