diff --git a/Sources/Containerization/ContainerManager.swift b/Sources/Containerization/ContainerManager.swift index bbdbd8c0..7ff386eb 100644 --- a/Sources/Containerization/ContainerManager.swift +++ b/Sources/Containerization/ContainerManager.swift @@ -376,6 +376,7 @@ public struct ContainerManager: Sendable { /// - readOnly: Whether to mount the root filesystem as read-only. /// - networking: Whether to create a network interface for this container. Defaults to `true`. /// When `false`, no network resources are allocated and `releaseNetwork`/`delete` remain safe to call. + /// - progress: Optional handler for tracking rootfs unpacking progress. public mutating func create( _ id: String, reference: String, @@ -383,6 +384,7 @@ public struct ContainerManager: Sendable { writableLayerSizeInBytes: UInt64? = nil, readOnly: Bool = false, networking: Bool = true, + progress: ProgressHandler? = nil, configuration: (inout LinuxContainer.Configuration) throws -> Void ) async throws -> LinuxContainer { let image = try await imageStore.get(reference: reference, pull: true) @@ -393,6 +395,7 @@ public struct ContainerManager: Sendable { writableLayerSizeInBytes: writableLayerSizeInBytes, readOnly: readOnly, networking: networking, + progress: progress, configuration: configuration ) } @@ -407,6 +410,7 @@ public struct ContainerManager: Sendable { /// - readOnly: Whether to mount the root filesystem as read-only. /// - networking: Whether to create a network interface for this container. Defaults to `true`. /// When `false`, no network resources are allocated and `releaseNetwork`/`delete` remain safe to call. + /// - progress: Optional handler for tracking rootfs unpacking progress. public mutating func create( _ id: String, image: Image, @@ -414,6 +418,7 @@ public struct ContainerManager: Sendable { writableLayerSizeInBytes: UInt64? = nil, readOnly: Bool = false, networking: Bool = true, + progress: ProgressHandler? = nil, configuration: (inout LinuxContainer.Configuration) throws -> Void ) async throws -> LinuxContainer { let path = try createContainerRoot(id) @@ -421,7 +426,8 @@ public struct ContainerManager: Sendable { var rootfs = try await unpack( image: image, destination: path.appendingPathComponent("rootfs.ext4"), - size: rootfsSizeInBytes + size: rootfsSizeInBytes, + progress: progress ) if readOnly { rootfs.options.append("ro") @@ -511,10 +517,10 @@ public struct ContainerManager: Sendable { return path } - private func unpack(image: Image, destination: URL, size: UInt64) async throws -> Mount { + private func unpack(image: Image, destination: URL, size: UInt64, progress: ProgressHandler? = nil) async throws -> Mount { do { let unpacker = EXT4Unpacker(blockSizeInBytes: size) - return try await unpacker.unpack(image, for: .current, at: destination) + return try await unpacker.unpack(image, for: .current, at: destination, progress: progress) } catch let err as ContainerizationError { if err.code == .exists { return .block( diff --git a/Sources/Containerization/Image/Unpacker/EXT4Unpacker.swift b/Sources/Containerization/Image/Unpacker/EXT4Unpacker.swift index 4e588e8d..89313540 100644 --- a/Sources/Containerization/Image/Unpacker/EXT4Unpacker.swift +++ b/Sources/Containerization/Image/Unpacker/EXT4Unpacker.swift @@ -84,27 +84,24 @@ public struct EXT4Unpacker: Unpacker { ) defer { try? filesystem.close() } + if let progress { + let totalSize = try await totalRegularFileBytes(in: manifest.layers, image: image) + if totalSize > 0 { + await progress([ProgressEvent(event: "add-total-size", value: totalSize)]) + } + } + for layer in manifest.layers { try Task.checkCancellation() let content = try await image.getContent(digest: layer.digest) - let compression: ContainerizationArchive.Filter - switch layer.mediaType { - case MediaTypes.imageLayer, MediaTypes.dockerImageLayer: - compression = .none - case MediaTypes.imageLayerGzip, MediaTypes.dockerImageLayerGzip: - compression = .gzip - case MediaTypes.imageLayerZstd, MediaTypes.dockerImageLayerZstd: - compression = .zstd - default: - throw ContainerizationError(.unsupported, message: "media type \(layer.mediaType) not supported.") - } - try filesystem.unpack( - source: content.path, + let compression = try compressionFilter(for: layer.mediaType) + let reader = try ArchiveReader( format: .paxRestricted, - compression: compression, - progress: progress + filter: compression, + file: content.path ) + try filesystem.unpack(reader: reader, progress: progress) } return .block( @@ -123,4 +120,50 @@ public struct EXT4Unpacker: Unpacker { } return blockPath } + + #if os(macOS) + private func compressionFilter(for mediaType: String) throws -> ContainerizationArchive.Filter { + switch mediaType { + case MediaTypes.imageLayer, MediaTypes.dockerImageLayer: + return .none + case MediaTypes.imageLayerGzip, MediaTypes.dockerImageLayerGzip: + return .gzip + case MediaTypes.imageLayerZstd, MediaTypes.dockerImageLayerZstd: + return .zstd + default: + throw ContainerizationError(.unsupported, message: "media type \(mediaType) not supported.") + } + } + + private func totalRegularFileBytes(in layers: [Descriptor], image: Image) async throws -> Int64 { + var totalSize: Int64 = 0 + + for layer in layers { + try Task.checkCancellation() + + let compression = try compressionFilter(for: layer.mediaType) + let content = try await image.getContent(digest: layer.digest) + let reader = try ArchiveReader( + format: .paxRestricted, + filter: compression, + file: content.path + ) + + for (entry, _) in reader.makeStreamingIterator() { + try Task.checkCancellation() + guard entry.fileType == .regular, let size = entry.size else { + continue + } + + let fileSize = Int64(clamping: size) + if totalSize > Int64.max - fileSize { + return Int64.max + } + totalSize += fileSize + } + } + + return totalSize + } + #endif } diff --git a/Sources/ContainerizationEXT4/Formatter+Unpack.swift b/Sources/ContainerizationEXT4/Formatter+Unpack.swift index fee04cc3..c793a275 100644 --- a/Sources/ContainerizationEXT4/Formatter+Unpack.swift +++ b/Sources/ContainerizationEXT4/Formatter+Unpack.swift @@ -122,6 +122,28 @@ extension EXT4.Formatter { compression: ContainerizationArchive.Filter = .gzip, progress: ProgressHandler? = nil ) throws { + // Optional first pass: scan headers to get total size (fast, metadata only) + if let progress { + let sizeReader = try ArchiveReader( + format: format, + filter: compression, + file: source + ) + var totalSize: Int64 = 0 + for (entry, _) in sizeReader.makeStreamingIterator() { + try Task.checkCancellation() + if entry.fileType == .regular, let size = entry.size { + totalSize += Int64(size) + } + } + if totalSize > 0 { + Task { + await progress([ProgressEvent(event: "add-total-size", value: totalSize)]) + } + } + } + + // Second pass: unpack let reader = try ArchiveReader( format: format, filter: compression, diff --git a/Tests/ContainerizationEXT4Tests/TestFormatterUnpack.swift b/Tests/ContainerizationEXT4Tests/TestFormatterUnpack.swift index 0407950a..0a152b2c 100644 --- a/Tests/ContainerizationEXT4Tests/TestFormatterUnpack.swift +++ b/Tests/ContainerizationEXT4Tests/TestFormatterUnpack.swift @@ -18,6 +18,7 @@ #if os(macOS) import ContainerizationArchive +import ContainerizationExtras import Foundation import Testing import SystemPackage @@ -130,6 +131,200 @@ struct Tar2EXT4Test: ~Copyable { } } +/// Collects progress events in a thread-safe manner. +private actor ProgressCollector { + var events: [ProgressEvent] = [] + + func append(_ newEvents: [ProgressEvent]) { + events.append(contentsOf: newEvents) + } + + func allEvents() -> [ProgressEvent] { + events + } +} + +struct UnpackProgressTest { + @Test func progressReportsAccurateSizes() async throws { + // Create an archive with files of known sizes + let tempDir = FileManager.default.uniqueTemporaryDirectory() + let archivePath = tempDir.appendingPathComponent("test.tar.gz", isDirectory: false) + let fsPath = FilePath(tempDir.appendingPathComponent("test.ext4.img", isDirectory: false)) + + defer { + try? FileManager.default.removeItem(at: tempDir) + } + + // Create test data with specific sizes + let file1Data = Data(repeating: 0xAA, count: 1024) // 1 KiB + let file2Data = Data(repeating: 0xBB, count: 4096) // 4 KiB + let file3Data = Data(repeating: 0xCC, count: 512) // 512 bytes + let expectedTotalSize: Int64 = 1024 + 4096 + 512 // 5632 bytes + + // Build the archive + let archiver = try ArchiveWriter( + configuration: ArchiveWriterConfiguration(format: .paxRestricted, filter: .gzip)) + try archiver.open(file: archivePath) + + try archiver.writeEntry(entry: WriteEntry.dir(path: "/data", permissions: 0o755), data: nil) + try archiver.writeEntry( + entry: WriteEntry.file(path: "/data/file1.bin", permissions: 0o644, size: Int64(file1Data.count)), + data: file1Data) + try archiver.writeEntry( + entry: WriteEntry.file(path: "/data/file2.bin", permissions: 0o644, size: Int64(file2Data.count)), + data: file2Data) + try archiver.writeEntry( + entry: WriteEntry.file(path: "/data/file3.bin", permissions: 0o644, size: Int64(file3Data.count)), + data: file3Data) + // Include an empty file to verify it doesn't break size calculations + try archiver.writeEntry( + entry: WriteEntry.file(path: "/data/empty.bin", permissions: 0o644, size: 0), + data: Data()) + try archiver.finishEncoding() + + // Set up progress collection + let collector = ProgressCollector() + let shouldPrintProgress = ProcessInfo.processInfo.environment["PRINT_UNPACK_PROGRESS"] == "1" + let progressHandler: ProgressHandler = { events in + if shouldPrintProgress { + for event in events { + print("unpack-progress \(event.event): \(event.value)") + } + } + await collector.append(events) + } + + // Unpack with progress tracking + let formatter = try EXT4.Formatter(fsPath) + try formatter.unpack(source: archivePath, progress: progressHandler) + try formatter.close() + + // Allow async progress tasks to complete + try await Task.sleep(for: .milliseconds(100)) + + // Analyze collected events + let allEvents = await collector.allEvents() + + var reportedTotalSize: Int64 = 0 + var cumulativeSize: Int64 = 0 + var itemCount: Int64 = 0 + + for event in allEvents { + switch event.event { + case "add-total-size": + let value = try #require(event.value as? Int64, "add-total-size value should be Int64") + reportedTotalSize += value + case "add-size": + let value = try #require(event.value as? Int64, "add-size value should be Int64") + cumulativeSize += value + case "add-items": + let value = try #require(event.value as? Int, "add-items value should be Int") + itemCount += Int64(value) + default: + break + } + } + + // Verify the progress contract + #expect( + reportedTotalSize == expectedTotalSize, + "Total size should be \(expectedTotalSize) bytes, got \(reportedTotalSize)") + #expect( + cumulativeSize == expectedTotalSize, + "Cumulative size should equal total size (\(expectedTotalSize)), got \(cumulativeSize)") + #expect( + itemCount == 5, + "Should have processed 5 entries (1 dir + 4 files), got \(itemCount)") + + // Verify incremental progress: we should get separate add-size events for each file + let addSizeEvents = allEvents.filter { $0.event == "add-size" } + #expect( + addSizeEvents.count == 4, + "Should have 4 add-size events (one per file, including empty), got \(addSizeEvents.count)") + + // Verify individual file sizes were reported correctly + let reportedSizes = addSizeEvents.compactMap { $0.value as? Int64 }.sorted() + #expect( + reportedSizes == [0, 512, 1024, 4096], + "Individual file sizes should be [0, 512, 1024, 4096], got \(reportedSizes)") + + // Verify event-by-event behavior expected by clients: + // total remains stable and written bytes are monotonic as progress updates arrive. + var runningTotal: Int64? + var runningWritten: Int64 = 0 + var previousSnapshot: (written: Int64, total: Int64?)? + var progressSnapshotCount = 0 + + for event in allEvents { + switch event.event { + case "add-total-size": + let value = try #require(event.value as? Int64, "add-total-size value should be Int64") + runningTotal = (runningTotal ?? 0) + value + case "add-size": + let value = try #require(event.value as? Int64, "add-size value should be Int64") + runningWritten += value + let currentSnapshot = (written: runningWritten, total: runningTotal) + if let previousSnapshot { + #expect( + currentSnapshot.written >= previousSnapshot.written, + "Written bytes should be monotonic: \(currentSnapshot.written) < \(previousSnapshot.written)") + #expect( + currentSnapshot.total == previousSnapshot.total, + "Total bytes should remain stable across progress updates") + } + previousSnapshot = currentSnapshot + progressSnapshotCount += 1 + default: + break + } + } + + #expect( + progressSnapshotCount == addSizeEvents.count, + "Should produce one monotonic snapshot per add-size update") + + // Verify add-total-size comes before add-size events (first pass before second pass) + if let totalSizeIndex = allEvents.firstIndex(where: { $0.event == "add-total-size" }), + let firstAddSizeIndex = allEvents.firstIndex(where: { $0.event == "add-size" }) { + #expect( + totalSizeIndex < firstAddSizeIndex, + "add-total-size should be reported before add-size events") + } + } + + @Test func progressHandlerIsOptional() throws { + // Verify that unpacking works without a progress handler (existing behavior) + let tempDir = FileManager.default.uniqueTemporaryDirectory() + let archivePath = tempDir.appendingPathComponent("test.tar.gz", isDirectory: false) + let fsPath = FilePath(tempDir.appendingPathComponent("test.ext4.img", isDirectory: false)) + + defer { + try? FileManager.default.removeItem(at: tempDir) + } + + let archiver = try ArchiveWriter( + configuration: ArchiveWriterConfiguration(format: .paxRestricted, filter: .gzip)) + try archiver.open(file: archivePath) + try archiver.writeEntry(entry: WriteEntry.dir(path: "/test", permissions: 0o755), data: nil) + let data = Data(repeating: 0x42, count: 100) + try archiver.writeEntry( + entry: WriteEntry.file(path: "/test/file.bin", permissions: 0o644, size: Int64(data.count)), + data: data) + try archiver.finishEncoding() + + // Unpack without progress handler - should not throw + let formatter = try EXT4.Formatter(fsPath) + try formatter.unpack(source: archivePath) + try formatter.close() + + // Verify the file was unpacked correctly + let reader = try EXT4.EXT4Reader(blockDevice: fsPath) + let children = try reader.children(of: EXT4.RootInode) + let childNames = Set(children.map { $0.0 }) + #expect(childNames.contains("test"), "Directory 'test' should exist in unpacked filesystem") + } +} + extension ContainerizationArchive.WriteEntry { static func dir(path: String, permissions: UInt16) -> WriteEntry { let entry = WriteEntry()