putty/Sources/PuttyKit/Fetchers/PageFetcher.swift

126 lines
4.8 KiB
Swift

import Foundation
#if canImport(FoundationNetworking)
import FoundationNetworking
#endif
/// A class that connects to a Selenium WebDriver running Chromium to fetch HTML page content
public final class PageFetcher {
private let webDriverURL: URL
private let session = URLSession.shared
// MARK: - Initialization
/// Initialize PageFetcher with WebDriver URL
/// - Parameter webDriverURL: The URL where Selenium WebDriver is running (default: http://localhost:4444)
public init(webDriverURL: String) {
self.webDriverURL = URL(string: webDriverURL)!
}
// MARK: - Public Methods
/// Fetch the raw HTML content from the specified URL.
///
/// - Parameter url: The URL to fetch content from
/// - Returns: The raw HTML content as a string
/// - Throws: PageFetcherError for various failure scenarios
public func fetchHTML(from url: String) async throws -> String {
let sessionId = try await startSession()
try await navigateToURL(url, sessionId: sessionId)
let source = try await getPageSource(sessionId: sessionId)
try await endSession(sessionId: sessionId)
return source
}
// MARK: - Private Methods
/// Start a new WebDriver session with Chrome capabilities.
private func startSession() async throws -> String {
let capabilities = [
"capabilities": [
"alwaysMatch": [
"browserName": "chrome",
"goog:chromeOptions": [
"args": [
"--headless",
"--no-sandbox",
"--disable-dev-shm-usage",
"--disable-gpu",
"--window-size=1920,1080",
],
],
],
],
]
let (data, _) = try await makeRequest(verb: "POST", path: "wd/hub/session", body: capabilities)
guard let json = try JSONSerialization.jsonObject(with: data) as? [String: Any],
let value = json["value"] as? [String: Any],
let sessionId = value["sessionId"] as? String
else {
throw PageFetcherError.invalidSessionResponse
}
return sessionId
}
private func navigateToURL(_ url: String, sessionId: String) async throws {
_ = try await makeRequest(verb: "POST", path: "url", sessionId: sessionId, body: ["url": url])
}
private func getPageSource(sessionId: String) async throws -> String {
let (data, _) = try await makeRequest(verb: "GET", path: "source", sessionId: sessionId)
guard let json = try JSONSerialization.jsonObject(with: data) as? [String: Any],
let value = json["value"] as? String
else {
throw PageFetcherError.invalidPageSourceResponse
}
return value
}
private func endSession(sessionId: String) async throws {
_ = try await makeRequest(verb: "DELETE", path: "", sessionId: sessionId)
}
private func makeRequest(verb: String, path: String,
body: [String: Any]? = nil) async throws -> (Data, HTTPURLResponse)
{
var request = URLRequest(url: webDriverURL.appendingPathComponent(path))
print("\(verb) \(request.url?.absoluteString ?? "")")
request.httpMethod = verb
if verb == "POST" {
request.setValue("application/json", forHTTPHeaderField: "Content-Type")
request.httpBody = try JSONSerialization.data(withJSONObject: body as Any)
}
let (data, response) = try await session.data(for: request)
guard let httpResponse = response as? HTTPURLResponse else {
throw PageFetcherError.invalidSessionResponse
}
guard httpResponse.statusCode == 200 else {
let maybeJSON = try? JSONSerialization.jsonObject(with: data) as?
[String: Any]
let value = maybeJSON?["value"] as? [String: Any]
let message = value?["message"] as? String ?? ""
throw PageFetcherError.seleniumError(httpResponse.statusCode, message)
}
return (data, httpResponse)
}
private func makeRequest(verb: String, path: String, sessionId: String,
body: [String: Any]? = nil) async throws -> (Data, HTTPURLResponse)
{
let addendum = [sessionId, path].filter { !$0.isEmpty }.joined(separator: "/")
return try await makeRequest(verb: verb, path: "wd/hub/session/\(addendum)", body: body)
}
}
// MARK: - Errors
// MARK: - Usage Example
public extension PageFetcher {
/// Convenience method to fetch GoComics A-Z page
func fetchAToZ() async throws -> String {
try await fetchHTML(from: "https://www.gocomics.com/comics/a-to-z")
}
}