1
0
Fork 0
gibidy/src/browser.py

110 lines
3.1 KiB
Python

import asyncio
import json
import logging
import shutil
import subprocess
import tempfile
import urllib.request
from dataclasses import dataclass
from pathlib import Path
from playwright.async_api import Browser, Playwright
logger = logging.getLogger(__name__)
CHROME_FLAGS = [
"--no-startup-window",
"--disable-field-trial-config",
"--disable-background-networking",
"--disable-background-timer-throttling",
"--disable-backgrounding-occluded-windows",
"--disable-back-forward-cache",
"--disable-breakpad",
"--disable-client-side-phishing-detection",
"--disable-component-extensions-with-background-pages",
"--disable-component-update",
"--no-default-browser-check",
"--disable-default-apps",
"--disable-dev-shm-usage",
"--disable-extensions",
"--disable-popup-blocking",
"--disable-prompt-on-repost",
"--disable-renderer-backgrounding",
"--disable-hang-monitor",
"--disable-ipc-flooding-protection",
"--force-color-profile=srgb",
"--metrics-recording-only",
"--no-first-run",
"--password-store=basic",
"--use-mock-keychain",
"--disable-infobars",
"--disable-sync",
"--enable-unsafe-swiftshader",
"--no-sandbox",
"--disable-search-engine-choice-screen",
]
DEFAULT_CDP_PORT = 9222
def _fetch_ws_endpoint(port: int) -> str | None:
try:
with urllib.request.urlopen(
f"http://127.0.0.1:{port}/json/version",
timeout=1,
) as resp:
data = json.loads(resp.read().decode("utf-8"))
return data.get("webSocketDebuggerUrl")
except Exception:
return None
@dataclass
class ManagedBrowser:
browser: Browser
process: subprocess.Popen
profile_dir: Path
async def close(self) -> None:
try:
await self.browser.close()
except Exception:
pass
self.process.terminate()
try:
self.process.wait(timeout=5)
except subprocess.TimeoutExpired:
self.process.kill()
if self.profile_dir.exists():
shutil.rmtree(self.profile_dir, ignore_errors=True)
async def launch(
playwright: Playwright, cdp_port: int = DEFAULT_CDP_PORT
) -> ManagedBrowser:
chrome_path = playwright.chromium.executable_path
profile_dir = Path(tempfile.mkdtemp(prefix="megapt_profile-", dir="/tmp"))
args = [
chrome_path,
*CHROME_FLAGS,
f"--user-data-dir={profile_dir}",
f"--remote-debugging-port={cdp_port}",
]
proc = subprocess.Popen(args, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
ws_endpoint = None
for _ in range(60):
ws_endpoint = await asyncio.to_thread(_fetch_ws_endpoint, cdp_port)
if ws_endpoint:
break
await asyncio.sleep(0.5)
if not ws_endpoint:
proc.terminate()
raise RuntimeError(f"CDP websocket not available on port {cdp_port}")
logger.info("CDP websocket: %s", ws_endpoint)
browser = await playwright.chromium.connect_over_cdp(ws_endpoint)
return ManagedBrowser(browser=browser, process=proc, profile_dir=profile_dir)