import Foundation #if canImport(FoundationNetworking) import FoundationNetworking #endif /// A class that connects to a Selenium WebDriver running Chromium to fetch HTML page content public struct PageFetcher { private let webDriverURL: URL private let session = URLSession.shared // MARK: - Initialization /// Initialize PageFetcher with WebDriver URL /// - Parameter webDriverURL: The URL where Selenium WebDriver is running (default: http://localhost:4444) public init(webDriverURL: String) { self.webDriverURL = URL(string: webDriverURL)! } // MARK: - Public Methods /// Fetch the raw HTML content from the specified URL. /// /// - Parameter url: The URL to fetch content from /// - Returns: The raw HTML content as a string /// - Throws: PageFetcherError for various failure scenarios public func fetchHTML(from url: String, waitFor element: String?) async throws -> String { let sessionId = try await startSession() try await navigateToURL(url, sessionId: sessionId) if let element { try await waitUntilElement(sessionId: sessionId, selector: element, attempts: 5) } let source = try await getPageSource(sessionId: sessionId) try await endSession(sessionId: sessionId) return source } // MARK: - Private Methods /// Start a new WebDriver session with Chrome capabilities. private func startSession() async throws -> String { let capabilities = [ "capabilities": [ "alwaysMatch": [ "browserName": "chrome", "goog:chromeOptions": [ "args": [ "--headless", "--no-sandbox", "--disable-dev-shm-usage", "--disable-gpu", "--window-size=1920,1080", ], ], ], ], ] let (data, _) = try await makeRequest(verb: "POST", path: "wd/hub/session", body: capabilities) guard let json = try JSONSerialization.jsonObject(with: data) as? [String: Any], let value = json["value"] as? [String: Any], let sessionId = value["sessionId"] as? String else { throw PageFetcherError.invalidSessionResponse } return sessionId } private func navigateToURL(_ url: String, sessionId: String) async throws { _ = try await makeRequest(verb: "POST", path: "url", sessionId: sessionId, body: ["url": url]) } private func findElement(sessionId: String, selector: String) async throws -> Bool { do { _ = try await makeRequest(verb: "POST", path: "element", sessionId: sessionId, body: ["using": "css selector", "value": selector]) return true } catch { return false } } private func waitUntilElement(sessionId: String, selector: String, attempts: Int) async throws { for _ in 0 ..< attempts { let success = try await findElement(sessionId: sessionId, selector: selector) if success { return } else { try await Task.sleep(nanoseconds: 1 * 1_000_000_000) } } } private func getPageSource(sessionId: String) async throws -> String { let (data, _) = try await makeRequest(verb: "GET", path: "source", sessionId: sessionId) guard let json = try JSONSerialization.jsonObject(with: data) as? [String: Any], let value = json["value"] as? String else { throw PageFetcherError.invalidPageSourceResponse } return value } private func endSession(sessionId: String) async throws { _ = try await makeRequest(verb: "DELETE", path: "", sessionId: sessionId) } private func makeRequest(verb: String, path: String, body: [String: Any]? = nil) async throws -> (Data, HTTPURLResponse) { var request = URLRequest(url: webDriverURL.appendingPathComponent(path)) print("\(verb) \(request.url?.absoluteString ?? "")") request.httpMethod = verb if verb == "POST" { request.setValue("application/json", forHTTPHeaderField: "Content-Type") request.httpBody = try JSONSerialization.data(withJSONObject: body as Any) } let (data, response) = try await session.data(for: request) guard let httpResponse = response as? HTTPURLResponse else { throw PageFetcherError.invalidSessionResponse } guard httpResponse.statusCode < 400 else { let maybeJSON = try? JSONSerialization.jsonObject(with: data) as? [String: Any] let value = maybeJSON?["value"] as? [String: Any] let message = value?["message"] as? String ?? "" throw PageFetcherError.seleniumError(httpResponse.statusCode, message) } return (data, httpResponse) } private func makeRequest(verb: String, path: String, sessionId: String, body: [String: Any]? = nil) async throws -> (Data, HTTPURLResponse) { let addendum = [sessionId, path].filter { !$0.isEmpty }.joined(separator: "/") return try await makeRequest(verb: verb, path: "wd/hub/session/\(addendum)", body: body) } }