refactor!: change the entire purpose of this script
This commit is contained in:
parent
217e176975
commit
71d1050adb
20 changed files with 1124 additions and 872 deletions
108
src/browser.py
Normal file
108
src/browser.py
Normal file
|
|
@ -0,0 +1,108 @@
|
|||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
import urllib.request
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
from playwright.async_api import Browser, Playwright
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
CHROME_FLAGS = [
|
||||
"--no-startup-window",
|
||||
"--disable-field-trial-config",
|
||||
"--disable-background-networking",
|
||||
"--disable-background-timer-throttling",
|
||||
"--disable-backgrounding-occluded-windows",
|
||||
"--disable-back-forward-cache",
|
||||
"--disable-breakpad",
|
||||
"--disable-client-side-phishing-detection",
|
||||
"--disable-component-extensions-with-background-pages",
|
||||
"--disable-component-update",
|
||||
"--no-default-browser-check",
|
||||
"--disable-default-apps",
|
||||
"--disable-dev-shm-usage",
|
||||
"--disable-extensions",
|
||||
"--disable-popup-blocking",
|
||||
"--disable-prompt-on-repost",
|
||||
"--disable-renderer-backgrounding",
|
||||
"--disable-hang-monitor",
|
||||
"--disable-ipc-flooding-protection",
|
||||
"--force-color-profile=srgb",
|
||||
"--metrics-recording-only",
|
||||
"--no-first-run",
|
||||
"--password-store=basic",
|
||||
"--use-mock-keychain",
|
||||
"--disable-infobars",
|
||||
"--disable-sync",
|
||||
"--enable-unsafe-swiftshader",
|
||||
"--no-sandbox",
|
||||
"--disable-search-engine-choice-screen",
|
||||
]
|
||||
|
||||
|
||||
def _fetch_ws_endpoint(port: int) -> str | None:
|
||||
try:
|
||||
with urllib.request.urlopen(
|
||||
f"http://127.0.0.1:{port}/json/version",
|
||||
timeout=1,
|
||||
) as resp:
|
||||
data = json.loads(resp.read().decode("utf-8"))
|
||||
return data.get("webSocketDebuggerUrl")
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
@dataclass
|
||||
class ManagedBrowser:
|
||||
browser: Browser
|
||||
process: subprocess.Popen
|
||||
profile_dir: Path
|
||||
|
||||
async def close(self) -> None:
|
||||
try:
|
||||
await self.browser.close()
|
||||
except Exception:
|
||||
pass
|
||||
self.process.terminate()
|
||||
try:
|
||||
self.process.wait(timeout=5)
|
||||
except subprocess.TimeoutExpired:
|
||||
self.process.kill()
|
||||
if self.profile_dir.exists():
|
||||
shutil.rmtree(self.profile_dir, ignore_errors=True)
|
||||
|
||||
|
||||
async def launch(playwright: Playwright, cdp_port: int | None = None) -> ManagedBrowser:
|
||||
chrome_path = os.environ.get("CHROMIUM_PATH") or playwright.chromium.executable_path
|
||||
cdp_port = cdp_port or int(os.environ.get("CDP_PORT", "9222"))
|
||||
profile_dir = Path(tempfile.mkdtemp(prefix="megapt_profile-", dir="/tmp"))
|
||||
|
||||
args = [
|
||||
chrome_path,
|
||||
*CHROME_FLAGS,
|
||||
f"--user-data-dir={profile_dir}",
|
||||
f"--remote-debugging-port={cdp_port}",
|
||||
]
|
||||
|
||||
proc = subprocess.Popen(args, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
||||
|
||||
ws_endpoint = None
|
||||
for _ in range(60):
|
||||
ws_endpoint = await asyncio.to_thread(_fetch_ws_endpoint, cdp_port)
|
||||
if ws_endpoint:
|
||||
break
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
if not ws_endpoint:
|
||||
proc.terminate()
|
||||
raise RuntimeError(f"CDP websocket not available on port {cdp_port}")
|
||||
|
||||
logger.info("CDP websocket: %s", ws_endpoint)
|
||||
browser = await playwright.chromium.connect_over_cdp(ws_endpoint)
|
||||
return ManagedBrowser(browser=browser, process=proc, profile_dir=profile_dir)
|
||||
Loading…
Add table
Add a link
Reference in a new issue