122 lines
4.7 KiB
Swift
122 lines
4.7 KiB
Swift
import Foundation
|
|
#if canImport(FoundationNetworking)
|
|
import FoundationNetworking
|
|
#endif
|
|
|
|
/// A class that connects to a Selenium WebDriver running Chromium to fetch HTML page content
|
|
public final class PageFetcher {
|
|
private let webDriverURL: URL
|
|
private let session = URLSession.shared
|
|
|
|
// MARK: - Initialization
|
|
|
|
/// Initialize PageFetcher with WebDriver URL
|
|
/// - Parameter webDriverURL: The URL where Selenium WebDriver is running (default: http://localhost:4444)
|
|
public init(webDriverURL: String) {
|
|
self.webDriverURL = URL(string: webDriverURL)!
|
|
}
|
|
|
|
// MARK: - Public Methods
|
|
|
|
/// Fetch the raw HTML content from the specified URL.
|
|
///
|
|
/// - Parameter url: The URL to fetch content from
|
|
/// - Returns: The raw HTML content as a string
|
|
/// - Throws: PageFetcherError for various failure scenarios
|
|
public func fetchHTML(from url: String) async throws -> String {
|
|
let sessionId = try await startSession()
|
|
try await navigateToURL(url, sessionId: sessionId)
|
|
let source = try await getPageSource(sessionId: sessionId)
|
|
try await endSession(sessionId: sessionId)
|
|
return source
|
|
}
|
|
|
|
// MARK: - Private Methods
|
|
|
|
/// Start a new WebDriver session with Chrome capabilities.
|
|
private func startSession() async throws -> String {
|
|
let capabilities = [
|
|
"capabilities": [
|
|
"alwaysMatch": [
|
|
"browserName": "chrome",
|
|
"goog:chromeOptions": [
|
|
"args": [
|
|
"--headless",
|
|
"--no-sandbox",
|
|
"--disable-dev-shm-usage",
|
|
"--disable-gpu",
|
|
"--window-size=1920,1080",
|
|
],
|
|
],
|
|
],
|
|
],
|
|
]
|
|
|
|
let (data, _) = try await makeRequest(verb: "POST", path: "wd/hub/session", body: capabilities)
|
|
guard let json = try JSONSerialization.jsonObject(with: data) as? [String: Any],
|
|
let value = json["value"] as? [String: Any],
|
|
let sessionId = value["sessionId"] as? String
|
|
else {
|
|
throw PageFetcherError.invalidSessionResponse
|
|
}
|
|
return sessionId
|
|
}
|
|
|
|
private func navigateToURL(_ url: String, sessionId: String) async throws {
|
|
_ = try await makeRequest(verb: "POST", path: "url", sessionId: sessionId, body: ["url": url])
|
|
}
|
|
|
|
private func getPageSource(sessionId: String) async throws -> String {
|
|
let (data, _) = try await makeRequest(verb: "GET", path: "source", sessionId: sessionId)
|
|
guard let json = try JSONSerialization.jsonObject(with: data) as? [String: Any],
|
|
let value = json["value"] as? String
|
|
else {
|
|
throw PageFetcherError.invalidPageSourceResponse
|
|
}
|
|
|
|
return value
|
|
}
|
|
|
|
private func endSession(sessionId: String) async throws {
|
|
_ = try await makeRequest(verb: "DELETE", path: "", sessionId: sessionId)
|
|
}
|
|
|
|
private func makeRequest(verb: String, path: String,
|
|
body: [String: Any]? = nil) async throws -> (Data, HTTPURLResponse)
|
|
{
|
|
var request = URLRequest(url: webDriverURL.appendingPathComponent(path))
|
|
print("\(verb) \(request.url?.absoluteString ?? "")")
|
|
request.httpMethod = verb
|
|
if verb == "POST" {
|
|
request.setValue("application/json", forHTTPHeaderField: "Content-Type")
|
|
request.httpBody = try JSONSerialization.data(withJSONObject: body as Any)
|
|
}
|
|
let (data, response) = try await session.data(for: request)
|
|
guard let httpResponse = response as? HTTPURLResponse else {
|
|
throw PageFetcherError.invalidSessionResponse
|
|
}
|
|
guard httpResponse.statusCode == 200 else {
|
|
let maybeJSON = try? JSONSerialization.jsonObject(with: data) as?
|
|
[String: Any]
|
|
let value = maybeJSON?["value"] as? [String: Any]
|
|
let message = value?["message"] as? String ?? ""
|
|
throw PageFetcherError.seleniumError(httpResponse.statusCode, message)
|
|
}
|
|
return (data, httpResponse)
|
|
}
|
|
|
|
private func makeRequest(verb: String, path: String, sessionId: String,
|
|
body: [String: Any]? = nil) async throws -> (Data, HTTPURLResponse)
|
|
{
|
|
let addendum = [sessionId, path].filter { !$0.isEmpty }.joined(separator: "/")
|
|
return try await makeRequest(verb: verb, path: "wd/hub/session/\(addendum)", body: body)
|
|
}
|
|
}
|
|
|
|
public extension PageFetcher {
|
|
/// Convenience method to fetch GoComics A-Z page
|
|
func fetchAToZ() async throws -> String {
|
|
try await fetchHTML(from: "https://www.gocomics.com/comics/a-to-z")
|
|
}
|
|
}
|