import asyncio import json import logging import shutil import subprocess import tempfile import urllib.request from dataclasses import dataclass from pathlib import Path from playwright.async_api import Browser, Playwright logger = logging.getLogger(__name__) CHROME_FLAGS = [ "--no-startup-window", "--disable-field-trial-config", "--disable-background-networking", "--disable-background-timer-throttling", "--disable-backgrounding-occluded-windows", "--disable-back-forward-cache", "--disable-breakpad", "--disable-client-side-phishing-detection", "--disable-component-extensions-with-background-pages", "--disable-component-update", "--no-default-browser-check", "--disable-default-apps", "--disable-dev-shm-usage", "--disable-extensions", "--disable-popup-blocking", "--disable-prompt-on-repost", "--disable-renderer-backgrounding", "--disable-hang-monitor", "--disable-ipc-flooding-protection", "--force-color-profile=srgb", "--metrics-recording-only", "--no-first-run", "--password-store=basic", "--use-mock-keychain", "--disable-infobars", "--disable-sync", "--enable-unsafe-swiftshader", "--no-sandbox", "--disable-search-engine-choice-screen", ] DEFAULT_CDP_PORT = 9222 def _fetch_ws_endpoint(port: int) -> str | None: try: with urllib.request.urlopen( f"http://127.0.0.1:{port}/json/version", timeout=1, ) as resp: data = json.loads(resp.read().decode("utf-8")) return data.get("webSocketDebuggerUrl") except Exception: return None @dataclass class ManagedBrowser: browser: Browser process: subprocess.Popen profile_dir: Path async def close(self) -> None: try: await self.browser.close() except Exception: pass self.process.terminate() try: self.process.wait(timeout=5) except subprocess.TimeoutExpired: self.process.kill() if self.profile_dir.exists(): shutil.rmtree(self.profile_dir, ignore_errors=True) async def launch( playwright: Playwright, cdp_port: int = DEFAULT_CDP_PORT ) -> ManagedBrowser: chrome_path = playwright.chromium.executable_path profile_dir = Path(tempfile.mkdtemp(prefix="megapt_profile-", dir="/tmp")) args = [ chrome_path, *CHROME_FLAGS, f"--user-data-dir={profile_dir}", f"--remote-debugging-port={cdp_port}", ] proc = subprocess.Popen(args, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) ws_endpoint = None for _ in range(60): ws_endpoint = await asyncio.to_thread(_fetch_ws_endpoint, cdp_port) if ws_endpoint: break await asyncio.sleep(0.5) if not ws_endpoint: proc.terminate() raise RuntimeError(f"CDP websocket not available on port {cdp_port}") logger.info("CDP websocket: %s", ws_endpoint) browser = await playwright.chromium.connect_over_cdp(ws_endpoint) return ManagedBrowser(browser=browser, process=proc, profile_dir=profile_dir)