putty/Sources/PuttyKit/Fetchers/PageFetcher.swift

138 lines
5.4 KiB
Swift

import Foundation
#if canImport(FoundationNetworking)
import FoundationNetworking
#endif
/// A class that connects to a Selenium WebDriver running Chromium to fetch HTML page content
public struct PageFetcher {
private let webDriverURL: URL
private let session = URLSession.shared
// MARK: - Initialization
/// Initialize PageFetcher with WebDriver URL
/// - Parameter webDriverURL: The URL where Selenium WebDriver is running (default: http://localhost:4444)
public init(webDriverURL: String) {
self.webDriverURL = URL(string: webDriverURL)!
}
// MARK: - Public Methods
/// Fetch the raw HTML content from the specified URL.
///
/// - Parameter url: The URL to fetch content from
/// - Returns: The raw HTML content as a string
/// - Throws: PageFetcherError for various failure scenarios
public func fetchHTML(from url: String, waitFor element: String?) async throws -> String {
let sessionId = try await startSession()
try await navigateToURL(url, sessionId: sessionId)
if let element {
try await waitUntilElement(sessionId: sessionId, selector: element, attempts: 5)
}
let source = try await getPageSource(sessionId: sessionId)
try await endSession(sessionId: sessionId)
return source
}
// MARK: - Private Methods
/// Start a new WebDriver session with Chrome capabilities.
private func startSession() async throws -> String {
let capabilities = [
"capabilities": [
"alwaysMatch": [
"browserName": "chrome",
"goog:chromeOptions": [
"args": [
"--headless",
"--no-sandbox",
"--disable-dev-shm-usage",
"--disable-gpu",
"--window-size=1920,1080",
],
],
],
],
]
let (data, _) = try await makeRequest(verb: "POST", path: "wd/hub/session", body: capabilities)
guard let json = try JSONSerialization.jsonObject(with: data) as? [String: Any],
let value = json["value"] as? [String: Any],
let sessionId = value["sessionId"] as? String
else {
throw PageFetcherError.invalidSessionResponse
}
return sessionId
}
private func navigateToURL(_ url: String, sessionId: String) async throws {
_ = try await makeRequest(verb: "POST", path: "url", sessionId: sessionId, body: ["url": url])
}
private func findElement(sessionId: String, selector: String) async throws -> Bool {
do {
_ = try await makeRequest(verb: "POST", path: "element", sessionId: sessionId, body: ["using": "css selector", "value": selector])
return true
} catch {
return false
}
}
private func waitUntilElement(sessionId: String, selector: String, attempts: Int) async throws {
for _ in 0 ..< attempts {
let success = try await findElement(sessionId: sessionId, selector: selector)
if success {
return
} else {
try await Task.sleep(nanoseconds: 1 * 1_000_000_000)
}
}
}
private func getPageSource(sessionId: String) async throws -> String {
let (data, _) = try await makeRequest(verb: "GET", path: "source", sessionId: sessionId)
guard let json = try JSONSerialization.jsonObject(with: data) as? [String: Any],
let value = json["value"] as? String
else {
throw PageFetcherError.invalidPageSourceResponse
}
return value
}
private func endSession(sessionId: String) async throws {
_ = try await makeRequest(verb: "DELETE", path: "", sessionId: sessionId)
}
private func makeRequest(verb: String, path: String,
body: [String: Any]? = nil) async throws -> (Data, HTTPURLResponse)
{
var request = URLRequest(url: webDriverURL.appendingPathComponent(path))
print("\(verb) \(request.url?.absoluteString ?? "")")
request.httpMethod = verb
if verb == "POST" {
request.setValue("application/json", forHTTPHeaderField: "Content-Type")
request.httpBody = try JSONSerialization.data(withJSONObject: body as Any)
}
let (data, response) = try await session.data(for: request)
guard let httpResponse = response as? HTTPURLResponse else {
throw PageFetcherError.invalidSessionResponse
}
guard httpResponse.statusCode < 400 else {
let maybeJSON = try? JSONSerialization.jsonObject(with: data) as?
[String: Any]
let value = maybeJSON?["value"] as? [String: Any]
let message = value?["message"] as? String ?? ""
throw PageFetcherError.seleniumError(httpResponse.statusCode, message)
}
return (data, httpResponse)
}
private func makeRequest(verb: String, path: String, sessionId: String,
body: [String: Any]? = nil) async throws -> (Data, HTTPURLResponse)
{
let addendum = [sessionId, path].filter { !$0.isEmpty }.joined(separator: "/")
return try await makeRequest(verb: verb, path: "wd/hub/session/\(addendum)", body: body)
}
}