From a3c843d63ccaf2ed53286d16fb2ba92d7ee678e0 Mon Sep 17 00:00:00 2001 From: "Arthur K." Date: Sat, 7 Mar 2026 20:18:02 +0300 Subject: [PATCH] mvp: wonky but working selenium firefox version --- .env.example | 13 +- .gitignore | 18 +- Dockerfile | 54 ++++-- entrypoint.sh | 16 +- extras/extension/dark-reader.js | 26 +++ extras/extension/manifest.json | 14 ++ extras/patch_firefox.py | 9 + pyproject.toml | 2 +- scripts/register.py | 30 ---- src/emails.py | 89 ++++++++++ src/pool.py | 218 ++++++++++++++++++++++++ src/provider.py | 147 ---------------- src/proxy.py | 19 +-- src/registration.py | 286 +++++++++++++++++++++----------- src/server.py | 64 +++---- src/usage.py | 6 +- tests/test_emails.py | 81 +++++++++ tests/test_proxy.py | 11 -- tests/test_registration.py | 40 +---- uv.lock | 2 +- 20 files changed, 738 insertions(+), 407 deletions(-) create mode 100644 extras/extension/dark-reader.js create mode 100644 extras/extension/manifest.json create mode 100644 extras/patch_firefox.py delete mode 100755 scripts/register.py create mode 100644 src/emails.py create mode 100644 src/pool.py delete mode 100644 src/provider.py create mode 100644 tests/test_emails.py delete mode 100644 tests/test_proxy.py diff --git a/.env.example b/.env.example index 44ccc88..769137f 100644 --- a/.env.example +++ b/.env.example @@ -1,14 +1,17 @@ # HTTP server port PORT=80 -# HTTP proxy with /new endpoint for IP rotation -PROXY_URL=http://localhost:8080 +# HTTPS proxy URL (will be used by Firefox directly) +https_proxy=http://user:pass@host:port -# Path to mail.json with Google accounts -MAIL_JSON=./mail.json +# Path to emails.txt (email:password per line) +EMAILS_FILE=/data/emails.txt -# Custom Firefox binary path (default: firefox-esr in Docker, or system firefox) +# Custom Firefox binary path (default: firefox-esr in Docker) FIREFOX_BINARY=firefox-esr +# Number of hot tokens to keep in pool +POOL_SIZE=10 + # Persistent data directory (tokens, screenshots, extensions) DATA_DIR=/data diff --git a/.gitignore b/.gitignore index 98ee17c..d1c5198 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,13 @@ -__pycache__/ -*.pyc -.venv/ *.egg-info/ -dist/ -build/ -.env -data/ +*.pyc *.xpi +.env +.mypy_cache/ +.pytest_cache/ +.ruff_cache/ +.venv/ +__pycache__/ +build/ +data/ +dist/ +emails.txt diff --git a/Dockerfile b/Dockerfile index 48369ad..a300046 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,37 +8,63 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ xauth \ ca-certificates \ curl \ - firefox-esr \ - && rm -rf /var/lib/apt/lists/* + firefox-esr=140.8.0esr-1~deb13u1 \ + fonts-noto \ + fonts-noto-cjk \ + fonts-dejavu \ + fonts-liberation \ + fonts-noto-color-emoji \ + pulseaudio \ + libgl1-mesa-dri \ + libglu1-mesa \ + zip && \ + rm -rf /var/lib/apt/lists/* && \ + fc-cache -fv && \ -COPY pyproject.toml uv.lock /app/ +# Install geckodriver +RUN GECKO_VERSION=$(curl -s https://api.github.com/repos/mozilla/geckodriver/releases/latest | grep tag_name | cut -d'"' -f4) && \ + curl -fsSL "https://github.com/mozilla/geckodriver/releases/download/${GECKO_VERSION}/geckodriver-${GECKO_VERSION}-linux64.tar.gz" | tar -xzf - -C /usr/local/bin && \ + chmod +x /usr/local/bin/geckodriver + +# Download uBlock Origin (latest) +RUN UBLOCK_VERSION=$(curl -s https://api.github.com/repos/gorhill/uBlock/releases/latest | grep tag_name | cut -d'"' -f4) && \ + mkdir -p /extras/extensions && \ + curl -fsSL -o /extras/extensions/ublock_origin.xpi \ + "https://github.com/gorhill/uBlock/releases/download/${UBLOCK_VERSION}/uBlock0_${UBLOCK_VERSION}.firefox.signed.xpi" + +COPY pyproject.toml uv.lock . RUN pip install --no-cache-dir uv RUN uv sync --frozen --no-dev -COPY entrypoint.sh /entrypoint.sh -RUN chmod +x /entrypoint.sh +# Configure fontconfig for emoji support -COPY src . +COPY extras/patch_firefox.py . +RUN python3 ./patch_firefox.py -# Download uBlock Origin -RUN mkdir -p /data/extensions && \ - curl -fsSL -o /data/extensions/ublock_origin.xpi \ - "https://github.com/gorhill/uBlock/releases/download/1.62.0/uBlock0_1.62.0.firefox.signed.xpi" +# Build Dark Reader extension (Selenium cleanup) +COPY extras/extension /tmp/extension +RUN cd /tmp/extension && zip -r /extras/extensions/dark-reader.xpi . && rm -rf /tmp/extension ENV PYTHONUNBUFFERED=1 ENV PORT=80 ENV DATA_DIR=/data -ENV PROXY_URL="" -ENV MAIL_JSON=/data/mail.json -ENV FIREFOX_BINARY=firefox-esr +ENV EXTRAS_DIR=/extras +ENV EMAILS_FILE=/data/emails.txt +ENV FIREFOX_BINARY=/usr/bin/firefox-esr +ENV GECKODRIVER_PATH=/usr/local/bin/geckodriver VOLUME ["/data"] EXPOSE 80 -HEALTHCHECK --start-period=10s --start-interval=2s CMD \ +HEALTHCHECK --start-period=60s --start-interval=2s CMD \ test "$(curl -fsS "http://127.0.0.1:$PORT/health")" = "ok" STOPSIGNAL SIGINT +COPY src . + +COPY entrypoint.sh /entrypoint.sh +RUN chmod +x /entrypoint.sh + CMD ["/entrypoint.sh"] diff --git a/entrypoint.sh b/entrypoint.sh index b289b49..6cb866b 100755 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -1,15 +1,21 @@ #!/bin/sh set -e -export DISPLAY=:99 - -Xvfb :99 -screen 0 1280x1024x24 -nolisten tcp -ac >/tmp/xvfb.log 2>&1 & -XVFB_PID=$! +XVFB_PID="" cleanup() { - kill "$XVFB_PID" >/dev/null 2>&1 || true + if [ -n "$XVFB_PID" ]; then + kill "$XVFB_PID" >/dev/null 2>&1 || true + fi } trap cleanup EXIT INT TERM +if [ -z "$DISPLAY" ]; then + export DISPLAY=:0 + export LIBGL_ALWAYS_SOFTWARE=1 + Xvfb :0 -screen 0 1920x1080x24+32 -nolisten tcp -ac -dpi 96 >/tmp/xvfb.log 2>&1 & + XVFB_PID=$! +fi + exec /app/.venv/bin/python -u server.py diff --git a/extras/extension/dark-reader.js b/extras/extension/dark-reader.js new file mode 100644 index 0000000..134fc1e --- /dev/null +++ b/extras/extension/dark-reader.js @@ -0,0 +1,26 @@ +(function() { + var code = '(' + function() { + // Remove Selenium artifacts (except webdriver - handled by binary patch) + var toDelete = ['cdc_', '$cdc_', 'wd_', 'selenium', '__webdriver_script_fn', '__driver_evaluate', '__webdriver_evaluate', '__selenium_evaluate', '__fxdriver_evaluate', '__driver_unwrapped', '__webdriver_unwrapped', '__selenium_unwrapped', '__fxdriver_unwrapped', '__webdriver_script_function', '__webdriver_script_func', '__webdriver_script_fn', '__fxdriver_athena_', '_Selenium_IDE_Recorder', '_selenium', 'calledSelenium', '$cdc_asdjflasutopfhvcZLmcfl_', '$chrome_asyncScriptInfo', '__$webdriverAsyncExecutor', '__nightmare', '_phantom', 'callPhantom', '__lastWatirAlert', '__lastWatirConfirm', '__lastWatirPrompt']; + toDelete.forEach(function(key) { + if (window[key]) delete window[key]; + if (document[key]) delete document[key]; + }); + + // Clean up document properties + for (var key in document) { + if (key.match(/\$[a-z]dc_/) && document[key]) { + delete document[key]; + } + } + } + ')();'; + + var script = document.createElement('script'); + script.textContent = code; + + var target = document.documentElement; + if (target) { + target.insertBefore(script, target.firstChild); + script.remove(); + } +})(); diff --git a/extras/extension/manifest.json b/extras/extension/manifest.json new file mode 100644 index 0000000..739e183 --- /dev/null +++ b/extras/extension/manifest.json @@ -0,0 +1,14 @@ +{ + "manifest_version": 2, + "name": "Dark Reader", + "version": "1.0", + "permissions": [""], + "content_scripts": [ + { + "matches": [""], + "js": ["dark-reader.js"], + "run_at": "document_start", + "all_frames": true + } + ] +} diff --git a/extras/patch_firefox.py b/extras/patch_firefox.py new file mode 100644 index 0000000..7eb982d --- /dev/null +++ b/extras/patch_firefox.py @@ -0,0 +1,9 @@ +with open("/usr/lib/firefox-esr/libxul.so", "rb") as f: + data = bytearray(f.read()) + +patch = bytes([0x31, 0xC0, 0xC3]) +offset = 0x443CAD0 +data[offset : offset + len(patch)] = patch + +with open("/usr/lib/firefox-esr/libxul.so", "wb") as f: + f.write(data) diff --git a/pyproject.toml b/pyproject.toml index ebdda90..4e499bb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ version = "0.1.0" requires-python = ">=3.14" dependencies = [ "aiohttp==3.13.3", - "selenium>=4.40.0", + "selenium>=4.41.0", ] [project.optional-dependencies] diff --git a/scripts/register.py b/scripts/register.py deleted file mode 100755 index 050f06c..0000000 --- a/scripts/register.py +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env python3 -"""Register a Kilo account via Google OAuth. - -Usage: - PROXY_URL=http://localhost:8080 python scripts/register.py -""" - -import asyncio -import logging -import sys -from pathlib import Path - -sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "src")) - -logging.basicConfig(level=logging.INFO) - -from registration import register_kilo_account - - -async def main(): - result = await register_kilo_account() - if result: - print(f"\nAPI key: {result.access_token}") - else: - print("\nRegistration failed") - sys.exit(1) - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/src/emails.py b/src/emails.py new file mode 100644 index 0000000..4d24b1b --- /dev/null +++ b/src/emails.py @@ -0,0 +1,89 @@ +"""Email account provider. + +Reads accounts from a text file (one per line, email:password format). +When an account is consumed, it is removed from the file. +""" + +import logging +import os +from dataclasses import dataclass +from pathlib import Path + +logger = logging.getLogger(__name__) + +EMAILS_FILE = Path(os.environ.get("EMAILS_FILE", "./emails.txt")) + + +@dataclass +class EmailAccount: + email: str + password: str + + +def _parse_line(line: str) -> EmailAccount | None: + line = line.strip() + if not line or line.startswith("#"): + return None + parts = line.split(":", 1) + if len(parts) != 2: + logger.warning("Skipping malformed line: %s", line[:40]) + return None + email, password = parts[0].strip(), parts[1].strip() + if not email or not password: + return None + return EmailAccount(email=email, password=password) + + +def peek_accounts(path: Path | None = None) -> list[EmailAccount]: + """Read all accounts without consuming them.""" + path = path or EMAILS_FILE + if not path.exists(): + return [] + lines = path.read_text().strip().splitlines() + accounts = [] + for line in lines: + acc = _parse_line(line) + if acc: + accounts.append(acc) + return accounts + + +def pop_account(path: Path | None = None) -> EmailAccount | None: + """Read and remove the first account from the file. + + Returns the account, or None if the file is empty. + """ + path = path or EMAILS_FILE + if not path.exists(): + logger.error("Emails file not found: %s", path) + return None + + lines = path.read_text().strip().splitlines() + + account = None + remaining = [] + for line in lines: + if account is None: + parsed = _parse_line(line) + if parsed: + account = parsed + continue + remaining.append(line) + + if account is None: + logger.error("No accounts left in %s", path) + return None + + # TODO: Write remaining lines back + # path.write_text("\n".join(remaining) + ("\n" if remaining else "")) + logger.info( + "Popped account %s, %d remaining", + account.email, + len([r for r in remaining if _parse_line(r)]), + ) + return account + + +def remaining_count(path: Path | None = None) -> int: + """Count how many accounts are left.""" + return len(peek_accounts(path)) diff --git a/src/pool.py b/src/pool.py new file mode 100644 index 0000000..25b887f --- /dev/null +++ b/src/pool.py @@ -0,0 +1,218 @@ +"""Hot token pool. + +Maintains a pool of valid tokens ready to be served. +Tokens are validated (non-zero balance) before entering the pool. +A background task keeps the pool filled to the target size. +""" + +import asyncio +import json +import logging +import os +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any + +from usage import get_balance +from registration import register_kilo_account +from proxy import rotate_proxy_ip +from emails import remaining_count + +logger = logging.getLogger(__name__) + +POOL_SIZE = int(os.environ.get("POOL_SIZE", "10")) +DATA_DIR = Path(os.environ.get("DATA_DIR", "./data")) +POOL_FILE = DATA_DIR / "pool.json" +REGISTRATION_MAX_ATTEMPTS = 4 + + +@dataclass +class PoolToken: + api_key: str + balance: dict[str, Any] = field(default_factory=dict) + + +class TokenPool: + """Hot pool of pre-registered, validated tokens. + + Pool is persisted to disk so tokens survive container restarts. + """ + + def __init__(self, target_size: int = POOL_SIZE): + self.target_size = target_size + self._tokens: list[PoolToken] = [] + self._lock = asyncio.Lock() + self._fill_task: asyncio.Task | None = None + self._load_from_disk() + + def _load_from_disk(self) -> None: + """Load saved pool from disk.""" + if not POOL_FILE.exists(): + return + try: + with open(POOL_FILE) as f: + data = json.load(f) + if not isinstance(data, list): + return + for item in data: + if isinstance(item, dict) and "api_key" in item: + self._tokens.append( + PoolToken( + api_key=item["api_key"], + balance=item.get("balance", {}), + ) + ) + if self._tokens: + logger.info("Loaded %d tokens from disk", len(self._tokens)) + except (json.JSONDecodeError, OSError) as e: + logger.warning("Failed to load pool from disk: %s", e) + + def _save_to_disk(self) -> None: + """Persist current pool to disk.""" + POOL_FILE.parent.mkdir(parents=True, exist_ok=True) + data = [{"api_key": t.api_key, "balance": t.balance} for t in self._tokens] + try: + with open(POOL_FILE, "w") as f: + json.dump(data, f, indent=2) + except OSError as e: + logger.warning("Failed to save pool to disk: %s", e) + + @property + def size(self) -> int: + return len(self._tokens) + + @property + def is_full(self) -> bool: + return self.size >= self.target_size + + async def get_token(self) -> PoolToken | None: + """Get a token from the pool. Returns None if pool is empty.""" + async with self._lock: + if not self._tokens: + return None + token = self._tokens.pop(0) + self._save_to_disk() + + # Trigger background refill + self._ensure_filling() + return token + + async def _validate_token(self, api_key: str) -> dict[str, Any] | None: + """Validate that a token has non-zero balance. Returns balance data or None.""" + balance_data = await get_balance(api_key) + if balance_data is None: + logger.warning("Token validation failed: could not fetch balance") + return None + + # Check for non-zero balance + balance_value = balance_data.get("balance", balance_data.get("remaining", 0)) + if not balance_value or balance_value <= 0: + logger.warning("Token has zero balance: %s", balance_data) + return None + + logger.info("Token validated, balance: %s", balance_data) + return balance_data + + async def _register_one(self) -> PoolToken | None: + """Register a single new account and validate it.""" + for attempt in range(1, REGISTRATION_MAX_ATTEMPTS + 1): + logger.info( + "Pool registration attempt %d/%d", attempt, REGISTRATION_MAX_ATTEMPTS + ) + + result = await register_kilo_account() + if not result: + logger.warning("Registration attempt %d failed", attempt) + await asyncio.sleep(1.5 * attempt) + continue + + balance = await self._validate_token(result.access_token) + if balance is not None: + return PoolToken(api_key=result.access_token, balance=balance) + + logger.warning("Registered token invalid (zero balance), rotating IP") + await rotate_proxy_ip() + await asyncio.sleep(1.5 * attempt) + + return None + + async def _fill_pool(self) -> None: + """Fill the pool up to target size.""" + while self.size < self.target_size: + emails_left = remaining_count() + if emails_left == 0: + logger.warning( + "No email accounts left, cannot fill pool further (pool size: %d/%d)", + self.size, + self.target_size, + ) + break + + logger.info( + "Pool fill: %d/%d, registering new token...", + self.size, + self.target_size, + ) + + token = await self._register_one() + if token: + async with self._lock: + self._tokens.append(token) + self._save_to_disk() + logger.info( + "Pool fill: added token, now %d/%d", self.size, self.target_size + ) + else: + logger.error("Pool fill: failed to register, stopping fill cycle") + break + + def _ensure_filling(self) -> None: + """Start background fill task if not already running.""" + if self._fill_task and not self._fill_task.done(): + return + self._fill_task = asyncio.create_task(self._safe_fill()) + + async def _safe_fill(self) -> None: + try: + await self._fill_pool() + except Exception: + logger.exception("Pool fill error") + + async def startup_fill(self) -> None: + """Initial fill on startup. Revalidates loaded tokens, then fills to target.""" + if self._tokens: + logger.info( + "Pool startup: %d tokens loaded from disk, revalidating...", self.size + ) + valid = [] + for token in self._tokens: + balance = await self._validate_token(token.api_key) + if balance is not None: + token.balance = balance + valid.append(token) + else: + logger.warning("Pool startup: discarding invalid token") + async with self._lock: + self._tokens = valid + self._save_to_disk() + logger.info("Pool startup: %d tokens valid after revalidation", self.size) + + logger.info("Pool startup: filling to %d tokens...", self.target_size) + await self._fill_pool() + logger.info("Pool startup: %d tokens ready", self.size) + + async def shutdown(self) -> None: + """Cancel background fill task.""" + if self._fill_task and not self._fill_task.done(): + self._fill_task.cancel() + try: + await self._fill_task + except asyncio.CancelledError: + pass + + def status(self) -> dict[str, Any]: + return { + "pool_size": self.size, + "target_size": self.target_size, + "is_full": self.is_full, + } diff --git a/src/provider.py b/src/provider.py deleted file mode 100644 index 762607b..0000000 --- a/src/provider.py +++ /dev/null @@ -1,147 +0,0 @@ -import asyncio -import logging -from typing import Any - -from tokens import ( - ProviderTokens, - clear_next_tokens, - load_next_tokens, - load_state, - load_tokens, - promote_next_tokens, - save_state, - save_tokens, -) -from usage import get_balance -from registration import register_kilo_account -from proxy import rotate_proxy_ip - -logger = logging.getLogger(__name__) - -REGISTRATION_MAX_ATTEMPTS = 4 - - -class KiloProvider: - """Kilo.ai token provider.""" - - def __init__(self): - self._token_write_lock = asyncio.Lock() - - async def _register_with_retries(self) -> bool: - for attempt in range(1, REGISTRATION_MAX_ATTEMPTS + 1): - logger.info( - "Registration attempt %s/%s", attempt, REGISTRATION_MAX_ATTEMPTS - ) - generated = await register_kilo_account() - if generated: - valid = await self._validate_account(generated.access_token) - if valid: - save_tokens(generated) - return True - logger.warning("Registered account invalid, rotating IP") - await rotate_proxy_ip() - - logger.warning("Registration attempt %s failed", attempt) - await asyncio.sleep(1.5 * attempt) - return False - - async def _validate_account(self, api_key: str) -> bool: - balance_data = await get_balance(api_key) - if balance_data is None: - logger.warning("Account validation failed: could not fetch balance") - return False - logger.info("Account validated, balance: %s", balance_data) - return True - - async def _create_next_account_under_lock(self) -> bool: - active_before, next_before = load_state() - if next_before: - return True - - logger.info("Creating next account") - for attempt in range(1, REGISTRATION_MAX_ATTEMPTS + 1): - logger.info( - "Next-account attempt %s/%s", attempt, REGISTRATION_MAX_ATTEMPTS - ) - generated = await register_kilo_account() - if generated: - valid = await self._validate_account(generated.access_token) - if valid: - if active_before: - save_state(active_before, generated) - else: - save_state(generated, None) - logger.info("Next account is ready") - return True - logger.warning("Next account invalid, rotating IP") - await rotate_proxy_ip() - - logger.warning("Next-account attempt %s failed", attempt) - await asyncio.sleep(1.5 * attempt) - - if active_before or next_before: - save_state(active_before, next_before) - return False - - async def force_recreate_token(self) -> str | None: - async with self._token_write_lock: - success = await self._register_with_retries() - if not success: - return None - clear_next_tokens() - tokens = load_tokens() - return tokens.access_token if tokens else None - - async def get_token(self) -> str | None: - tokens = load_tokens() - if tokens and tokens.access_token: - return tokens.access_token - - async with self._token_write_lock: - tokens = load_tokens() - if tokens and tokens.access_token: - return tokens.access_token - - logger.info("No valid tokens, registering new account") - success = await self._register_with_retries() - if not success: - return None - - tokens = load_tokens() - return tokens.access_token if tokens else None - - async def maybe_rotate_account(self, usage_percent: int) -> bool: - if usage_percent < 95: - return False - - async with self._token_write_lock: - if not load_next_tokens(): - created = await self._create_next_account_under_lock() - if not created: - return False - - switched = promote_next_tokens() - if switched: - logger.info("Switched active account (usage >= 95%%)") - return switched - - async def get_usage_info(self, access_token: str) -> dict[str, Any]: - balance_data = await get_balance(access_token) - if balance_data is None: - return {"error": "Failed to get balance"} - - total = balance_data.get("total", 0) - used = balance_data.get("used", 0) - remaining = balance_data.get("remaining", balance_data.get("balance", 0)) - - if total and total > 0: - used_percent = int(min(100, (used / total) * 100)) - else: - used_percent = 0 - - return { - "used_percent": used_percent, - "remaining_percent": max(0, 100 - used_percent), - "exhausted": used_percent >= 100, - "balance": balance_data, - } diff --git a/src/proxy.py b/src/proxy.py index 1269e60..84f1955 100644 --- a/src/proxy.py +++ b/src/proxy.py @@ -5,27 +5,22 @@ import aiohttp logger = logging.getLogger(__name__) -PROXY_URL = os.environ.get("PROXY_URL", "") - - -def get_proxy_url() -> str | None: - """Return the configured proxy URL, or None if not set.""" - return PROXY_URL or None +HTTPS_PROXY = os.environ.get("HTTPS_PROXY") or os.environ.get("https_proxy") async def rotate_proxy_ip() -> bool: - """Call the proxy's /new endpoint to get a fresh IP address.""" - if not PROXY_URL: - logger.warning("No proxy URL configured, cannot rotate IP") + if not HTTPS_PROXY: + logger.warning("No proxy configured, cannot rotate IP") return False - base = PROXY_URL.rstrip("/") - new_url = f"{base}/new" + parsed = HTTPS_PROXY.replace("http://", "").replace("https://", "").split("@")[-1] + host, port = parsed.split(":")[0], parsed.split(":")[1].split("/")[0] + rotate_url = f"http://{host}:{port}/rotate" timeout = aiohttp.ClientTimeout(total=15) try: async with aiohttp.ClientSession(timeout=timeout) as session: - async with session.get(new_url) as resp: + async with session.post(rotate_url) as resp: if resp.ok: logger.info("Proxy IP rotated successfully") return True diff --git a/src/registration.py b/src/registration.py index 8e445b4..cdb6931 100644 --- a/src/registration.py +++ b/src/registration.py @@ -1,7 +1,7 @@ import asyncio -import json import logging import os +import random import time as _time from datetime import datetime from pathlib import Path @@ -10,6 +10,8 @@ from urllib.parse import urlparse from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.firefox.options import Options +from selenium.webdriver.firefox.service import Service +from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.remote.webdriver import WebDriver from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import WebDriverWait @@ -20,20 +22,39 @@ from selenium.common.exceptions import ( ) from tokens import ProviderTokens -from proxy import get_proxy_url, rotate_proxy_ip +from proxy import HTTPS_PROXY, rotate_proxy_ip +from emails import pop_account logger = logging.getLogger(__name__) DATA_DIR = Path(os.environ.get("DATA_DIR", "./data")) -MAIL_JSON = Path(os.environ.get("MAIL_JSON", "./mail.json")) +EXTRAS_DIR = Path(os.environ.get("EXTRAS_DIR", "./extras")) FIREFOX_BINARY = os.environ.get("FIREFOX_BINARY", "firefox") +GECKODRIVER_PATH = os.environ.get("GECKODRIVER_PATH", "/usr/local/bin/geckodriver") -SIGN_IN_URL = "https://app.kilo.ai/users/sign_in?callbackPath=/profile" +KILO_HOME = "https://kilo.ai/" PROFILE_URL = "https://app.kilo.ai/profile" MAX_IP_ROTATIONS = 3 +def human_delay(): + _time.sleep(random.uniform(0.5, 1.35)) + + +def human_type(element, text): + for char in text: + element.send_keys(char) + _time.sleep(random.uniform(0.05, 0.15)) + + +def human_click(driver, element): + driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", element) + human_delay() + driver.execute_script("arguments[0].click();", element) + human_delay() + + def _is_on_kilo(url: str) -> bool: """Check if URL's actual domain is kilo.ai (not just in query params).""" hostname = urlparse(url).hostname or "" @@ -61,33 +82,17 @@ def save_error_screenshot(driver: WebDriver | None, step: str) -> None: logger.warning("Failed to save screenshot at step %s: %s", step, e) -def load_google_accounts() -> list[dict[str, str]]: - """Load Google accounts from mail.json.""" - if not MAIL_JSON.exists(): - logger.error("mail.json not found at %s", MAIL_JSON) - return [] - try: - with open(MAIL_JSON) as f: - accounts = json.load(f) - if not isinstance(accounts, list): - logger.error("mail.json must contain a JSON array") - return [] - return accounts - except (json.JSONDecodeError, OSError) as e: - logger.error("Failed to read mail.json: %s", e) - return [] - - def _create_firefox_driver() -> WebDriver: """Launch Firefox with fresh profile mimicking a real user setup.""" - proxy_url = get_proxy_url() + proxy_url = HTTPS_PROXY options = Options() os.environ["TZ"] = "America/New_York" - # Private browsing - options.add_argument("--private-window") - options.set_preference("browser.privatebrowsing.autostart", True) + # Private browsing mode with extensions allowed + options.add_argument("-private") + options.set_preference("extensions.privatebrowsing.autostart", True) + options.set_preference("extensions.allowPrivateBrowsingByDefault", True) # Dark theme options.set_preference("ui.systemUsesDarkTheme", 1) @@ -105,6 +110,16 @@ def _create_firefox_driver() -> WebDriver: # Disable WebRTC IP leak options.set_preference("media.peerconnection.enabled", False) + # Anti-detection: hide webdriver + options.set_preference("dom.webdriver.enabled", False) + options.set_preference("useAutomationExtension", False) + + # Enable WebGL (software rendering via Mesa) + options.set_preference("webgl.disabled", False) + options.set_preference("webgl.force-enabled", True) + options.set_preference("webgl.msaa-force", True) + options.set_preference("webgl.max-warnings-per-context", 0) + # Proxy if proxy_url: parsed = urlparse(proxy_url) @@ -120,18 +135,27 @@ def _create_firefox_driver() -> WebDriver: logger.info("Firefox proxy: %s:%s", proxy_host, proxy_port) options.binary_location = FIREFOX_BINARY - driver = webdriver.Firefox(options=options) + service = Service(executable_path=GECKODRIVER_PATH) + driver = webdriver.Firefox(service=service, options=options) # type: ignore[reportCallIssue] driver.set_page_load_timeout(120) + # Install Dark Reader extension (Selenium cleanup) + dark_reader_path = EXTRAS_DIR / "extensions" / "dark-reader.xpi" + if dark_reader_path.exists(): + driver.install_addon(str(dark_reader_path), temporary=True) + logger.info("Dark Reader extension installed") + else: + logger.warning("Dark Reader xpi not found at %s", dark_reader_path) + # Install uBlock Origin - ublock_path = DATA_DIR / "extensions" / "ublock_origin.xpi" + ublock_path = EXTRAS_DIR / "extensions" / "ublock_origin.xpi" if ublock_path.exists(): - driver.install_addon(str(ublock_path)) + driver.install_addon(str(ublock_path), temporary=True) logger.info("uBlock Origin installed") else: logger.warning("uBlock Origin xpi not found at %s", ublock_path) - logger.info("Firefox launched (private, dark theme, strict ETP, uBlock)") + logger.info("Firefox launched (Dark Reader, uBlock, dark theme, strict ETP)") return driver @@ -144,13 +168,15 @@ def _google_sign_in(driver: WebDriver, email: str, password: str) -> bool: email_input = wait.until( EC.visibility_of_element_located((By.CSS_SELECTOR, 'input[type="email"]')) ) + human_delay() email_input.clear() - email_input.send_keys(email) + human_delay() + human_type(email_input, email) + human_delay() # Click Next next_btn = driver.find_element(By.CSS_SELECTOR, "#identifierNext") - next_btn.click() - _time.sleep(2) + human_click(driver, next_btn) # Enter password password_input = WebDriverWait(driver, 150).until( @@ -159,21 +185,26 @@ def _google_sign_in(driver: WebDriver, email: str, password: str) -> bool: ) ) logger.info("Password field found, filling...") + human_delay() password_input.clear() - password_input.send_keys(password) + human_delay() + human_type(password_input, password) + human_delay() # Click Next try: password_next = driver.find_element(By.CSS_SELECTOR, "#passwordNext") - password_next.click() + human_click(driver, password_next) except NoSuchElementException: buttons = driver.find_elements(By.CSS_SELECTOR, "button") for btn in buttons: if "next" in btn.text.lower(): - btn.click() + human_click(driver, btn) break - _time.sleep(3) + human_delay() + # wait for the page to reload # TODO: wait for a proper event + _time.sleep(8) # Handle consent / TOS / speedbump screens for _ in range(15): if _is_on_kilo(driver.current_url): @@ -185,14 +216,29 @@ def _google_sign_in(driver: WebDriver, email: str, password: str) -> bool: all_buttons = driver.find_elements(By.CSS_SELECTOR, "button") if all_buttons: - logger.info( - "Found %d buttons, clicking last (allow/continue)...", - len(all_buttons), + btn_texts = [b.text.strip() for b in all_buttons] + logger.info("Found %d buttons: %s", len(all_buttons), btn_texts) + + btn = all_buttons[-1] + driver.execute_script( + "arguments[0].scrollIntoView({block: 'center'});", btn ) - all_buttons[-1].click() - _time.sleep(3) + human_delay() + + # Try ActionChains for more realistic click + try: + ActionChains(driver).move_to_element(btn).pause( + 0.3 + ).click().perform() + except Exception: + btn.click() + human_delay() + + # Check if URL changed + if _is_on_kilo(driver.current_url): + return True else: - _time.sleep(2) + human_delay() return _is_on_kilo(driver.current_url) @@ -208,27 +254,85 @@ def _try_register_once_sync( ) -> str | None: """Attempt one full registration cycle via Google OAuth.""" try: - # Step 1: Navigate to sign-in - logger.info("[1/4] Navigating to Kilo sign-in page...") - driver.get(SIGN_IN_URL) + # Step 1: Navigate to Kilo home + logger.info("[1/6] Navigating to Kilo home...") + driver.get(KILO_HOME) + human_delay() wait = WebDriverWait(driver, 150) - # Step 2: Click "Continue with Google" - logger.info("[2/4] Clicking 'Continue with Google'...") - google_btn = wait.until( + # Step 2: Click Sign up (opens new tab) + logger.info("[2/6] Clicking 'Sign up'...") + handles_before = set(driver.window_handles) + signup_btn = wait.until( EC.element_to_be_clickable( - (By.XPATH, "//*[contains(text(), 'Continue with Google')]") + ( + By.XPATH, + "//a[contains(text(), 'Sign up') or contains(text(), 'sign up')]", + ) ) ) - google_btn.click() + human_click(driver, signup_btn) + + # Switch to new tab + WebDriverWait(driver, 30).until( + lambda d: len(d.window_handles) > len(handles_before) + ) + new_handles = set(driver.window_handles) - handles_before + if new_handles: + driver.switch_to.window(new_handles.pop()) + logger.info("[2/6] Switched to new tab: %s", driver.current_url) + else: + raise AutomationError( + "signup", "No new tab opened after clicking Sign up", driver + ) + human_delay() + + # Wait for page load + WebDriverWait(driver, 30).until( + lambda d: d.execute_script("return document.readyState") == "complete" + ) + human_delay() + logger.info("[2/6] Page loaded: %s", driver.current_url) + + # Step 3: Click "Sign in or Sign up" + logger.info("[3/6] Clicking 'Sign in or Sign up'...") + signin_signup_btn = wait.until( + EC.element_to_be_clickable( + ( + By.XPATH, + "//a[contains(text(), 'Sign in') or contains(text(), 'sign in') or contains(text(), 'Sign up') or contains(text(), 'sign up')]", + ) + ) + ) + human_click(driver, signin_signup_btn) + human_delay() + + # Wait for page load + WebDriverWait(driver, 30).until( + lambda d: d.execute_script("return document.readyState") == "complete" + ) + human_delay() + logger.info("[3/6] Redirected to: %s", driver.current_url) + + # Step 4: Click "Sign in with Google" + logger.info("[4/6] Clicking 'Sign in with Google'...") + google_btn = wait.until( + EC.element_to_be_clickable( + ( + By.XPATH, + "//*[contains(text(), 'Sign in with Google') or contains(text(), 'Continue with Google')]", + ) + ) + ) + human_click(driver, google_btn) # Wait for Google WebDriverWait(driver, 30).until(EC.url_contains("accounts.google.com")) - logger.info("[2/4] Google sign-in page loaded: %s", driver.current_url) + logger.info("[4/6] Google sign-in page loaded: %s", driver.current_url) - # Step 3: Google sign-in - logger.info("[3/4] Signing in with Google (%s)...", email) + # Step 5: Google sign-in + logger.info("[5/6] Signing in with Google (%s)...", email) success = _google_sign_in(driver, email, password) if not success and not _is_on_kilo(driver.current_url): @@ -237,16 +341,16 @@ def _try_register_once_sync( ) # Wait for redirect to kilo.ai - logger.info("[3/4] Waiting for Kilo redirect...") + logger.info("[5/6] Waiting for Kilo redirect...") deadline = _time.time() + 120 while _time.time() < deadline: if ( _is_on_kilo(driver.current_url) and "/users/sign_in" not in driver.current_url ): - logger.info("[3/4] On kilo.ai: %s", driver.current_url) + logger.info("[5/6] On kilo.ai: %s", driver.current_url) break - _time.sleep(1) + human_delay() else: logger.warning("Redirect not detected, current: %s", driver.current_url) @@ -255,11 +359,10 @@ def _try_register_once_sync( confirm_btn = WebDriverWait(driver, 10).until( EC.element_to_be_clickable((By.CSS_SELECTOR, "input#confirm")) ) - logger.info("[3/4] Educational account page, clicking confirm...") - confirm_btn.click() - _time.sleep(3) + logger.info("[5/6] Educational account page, clicking confirm...") + human_click(driver, confirm_btn) except TimeoutException: - logger.info("[3/4] No educational account page, continuing...") + logger.info("[5/6] No educational account page, continuing...") # Wait for /get-started or /profile deadline = _time.time() + 60 @@ -267,11 +370,12 @@ def _try_register_once_sync( url = driver.current_url if "/get-started" in url or "/profile" in url: break - _time.sleep(1) + human_delay() - # Step 4: Get API key - logger.info("[4/4] Navigating to profile to get API key...") + # Step 6: Get API key + logger.info("[6/6] Navigating to profile to get API key...") driver.get(PROFILE_URL) + human_delay() api_key_input = WebDriverWait(driver, 200).until( EC.visibility_of_element_located((By.CSS_SELECTOR, "input#api-key")) @@ -282,7 +386,7 @@ def _try_register_once_sync( raise AutomationError("profile", "API key input is empty", driver) api_key = api_key.strip() - logger.info("[4/4] API key obtained (length=%d)", len(api_key)) + logger.info("[6/6] API key obtained (length=%d)", len(api_key)) return api_key except AutomationError as e: @@ -298,15 +402,14 @@ def _try_register_once_sync( async def register_kilo_account() -> ProviderTokens | None: """Register a new Kilo account via Google OAuth using Selenium Firefox. - Reads Google accounts from mail.json, tries each one. + Pops one email account from emails.txt and attempts registration. Rotates proxy IP between attempts if needed. - Browser is NOT closed after registration. """ logger.info("=== Starting Kilo account registration (Google OAuth) ===") - accounts = load_google_accounts() - if not accounts: - logger.error("No Google accounts available in mail.json") + account = pop_account() + if not account: + logger.error("No email accounts available") return None driver: WebDriver | None = None @@ -315,6 +418,8 @@ async def register_kilo_account() -> ProviderTokens | None: driver = await asyncio.to_thread(_create_firefox_driver) for ip_attempt in range(MAX_IP_ROTATIONS): + # driver.get("http://localhost:8005/") + # await asyncio.sleep(100000000000000000) # for debugging if ip_attempt > 0: logger.info( "Rotating proxy IP (attempt %d/%d)...", @@ -325,36 +430,27 @@ async def register_kilo_account() -> ProviderTokens | None: if not rotated: logger.warning("IP rotation failed, trying anyway") - for account in accounts: - email = account.get("email", "") - password = account.get("password", "") - if not email or not password: - logger.warning("Skipping account with missing credentials") - continue + logger.info( + "Trying Google account: %s (IP attempt %d/%d)", + account.email, + ip_attempt + 1, + MAX_IP_ROTATIONS, + ) - logger.info( - "Trying Google account: %s (IP rotation %d/%d)", - email, - ip_attempt + 1, - MAX_IP_ROTATIONS, + api_key = await asyncio.to_thread( + _try_register_once_sync, driver, account.email, account.password + ) + + if api_key: + return ProviderTokens( + access_token=api_key, + refresh_token=None, + expires_at=0, ) - api_key = await asyncio.to_thread( - _try_register_once_sync, driver, email, password - ) + await asyncio.sleep(2) - if api_key: - return ProviderTokens( - access_token=api_key, - refresh_token=None, - expires_at=0, - ) - - await asyncio.sleep(2) - - logger.warning("All accounts failed for current IP") - - logger.error("All registration attempts exhausted") + logger.error("All registration attempts exhausted for %s", account.email) return None except Exception as e: diff --git a/src/server.py b/src/server.py index 155f69d..f32a942 100644 --- a/src/server.py +++ b/src/server.py @@ -4,51 +4,33 @@ import os from aiohttp import web -from provider import KiloProvider +from pool import TokenPool logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) PORT = int(os.environ.get("PORT", 8080)) -provider = KiloProvider() -background_task: asyncio.Task | None = None +pool = TokenPool() async def token_handler(request: web.Request) -> web.Response: del request - token = await provider.get_token() + token = await pool.get_token() if not token: - return web.json_response({"error": "Failed to get token"}, status=503) + return web.json_response( + {"error": "No tokens available", "pool": pool.status()}, + status=503, + ) - usage_info = await provider.get_usage_info(token) - if "error" in usage_info: - # Token doesn't work — recreate - token = await provider.force_recreate_token() - if not token: - return web.json_response({"error": "Failed to recreate token"}, status=503) - usage_info = await provider.get_usage_info(token) - if "error" in usage_info: - return web.json_response({"error": usage_info["error"]}, status=503) - - usage_percent = int(usage_info.get("used_percent", 0)) - - # Maybe rotate account - switched = await provider.maybe_rotate_account(usage_percent) - if switched: - token = await provider.get_token() - if not token: - return web.json_response( - {"error": "Failed to get token after switch"}, status=503 - ) - - logger.info("Token issued, usage=%s%%", usage_percent) + logger.info("Token issued (pool: %d/%d)", pool.size, pool.target_size) return web.json_response( { - "token": token, - "usage": usage_info, + "token": token.api_key, + "balance": token.balance, + "pool": pool.status(), } ) @@ -58,23 +40,20 @@ async def health_handler(request: web.Request) -> web.Response: return web.Response(text="ok") +async def status_handler(request: web.Request) -> web.Response: + del request + return web.json_response(pool.status()) + + async def on_startup(app: web.Application): del app - logger.info("Startup: ensuring token is ready...") - token = await provider.get_token() - if not token: - logger.warning("Startup: no token available, will register on first request") + logger.info("Startup: filling token pool...") + await pool.startup_fill() async def on_cleanup(app: web.Application): del app - global background_task - if background_task and not background_task.done(): - background_task.cancel() - try: - await background_task - except asyncio.CancelledError: - pass + await pool.shutdown() def create_app() -> web.Application: @@ -83,11 +62,14 @@ def create_app() -> web.Application: app.on_cleanup.append(on_cleanup) app.router.add_get("/health", health_handler) app.router.add_get("/token", token_handler) + app.router.add_get("/status", status_handler) return app def main(): - logger.info("Starting kilocode on port %s", PORT) + logger.info( + "Starting kilocode on port %s (pool target: %d)", PORT, pool.target_size + ) app = create_app() web.run_app(app, host="0.0.0.0", port=PORT) diff --git a/src/usage.py b/src/usage.py index 6729115..7760532 100644 --- a/src/usage.py +++ b/src/usage.py @@ -3,7 +3,7 @@ from typing import Any import aiohttp -from proxy import get_proxy_url +import proxy logger = logging.getLogger(__name__) @@ -20,11 +20,11 @@ async def get_balance( "Accept": "application/json", } - proxy = get_proxy_url() + proxy_url = proxy.HTTPS_PROXY timeout = aiohttp.ClientTimeout(total=timeout_ms / 1000) try: async with aiohttp.ClientSession(timeout=timeout) as session: - async with session.get(BALANCE_URL, headers=headers, proxy=proxy) as res: + async with session.get(BALANCE_URL, headers=headers, proxy=proxy_url) as res: if not res.ok: body = await res.text() logger.warning( diff --git a/tests/test_emails.py b/tests/test_emails.py new file mode 100644 index 0000000..423fc93 --- /dev/null +++ b/tests/test_emails.py @@ -0,0 +1,81 @@ +from emails import pop_account, peek_accounts, remaining_count, _parse_line +import emails as em + + +def test_parse_line(): + acc = _parse_line("user@example.com:pass123") + assert acc is not None + assert acc.email == "user@example.com" + assert acc.password == "pass123" + + +def test_parse_line_with_colon_in_password(): + acc = _parse_line("user@example.com:pass:with:colons") + assert acc is not None + assert acc.password == "pass:with:colons" + + +def test_parse_line_empty(): + assert _parse_line("") is None + assert _parse_line(" ") is None + assert _parse_line("# comment") is None + + +def test_parse_line_malformed(): + assert _parse_line("no-colon-here") is None + + +def test_peek_accounts(tmp_path, monkeypatch): + f = tmp_path / "emails.txt" + f.write_text("a@b.com:pass1\nc@d.com:pass2\n") + monkeypatch.setattr(em, "EMAILS_FILE", f) + + accounts = peek_accounts() + assert len(accounts) == 2 + assert accounts[0].email == "a@b.com" + assert accounts[1].email == "c@d.com" + + # peek doesn't consume + assert remaining_count() == 2 + + +def test_pop_account(tmp_path, monkeypatch): + f = tmp_path / "emails.txt" + f.write_text("a@b.com:pass1\nc@d.com:pass2\ne@f.com:pass3\n") + monkeypatch.setattr(em, "EMAILS_FILE", f) + + acc = pop_account() + assert acc is not None + assert acc.email == "a@b.com" + assert remaining_count() == 2 + + acc = pop_account() + assert acc is not None + assert acc.email == "c@d.com" + assert remaining_count() == 1 + + +def test_pop_account_empty(tmp_path, monkeypatch): + f = tmp_path / "emails.txt" + f.write_text("") + monkeypatch.setattr(em, "EMAILS_FILE", f) + + assert pop_account() is None + + +def test_pop_account_missing_file(tmp_path, monkeypatch): + monkeypatch.setattr(em, "EMAILS_FILE", tmp_path / "nope.txt") + assert pop_account() is None + + +def test_pop_skips_comments(tmp_path, monkeypatch): + f = tmp_path / "emails.txt" + f.write_text("# first is comment\na@b.com:pass1\n") + monkeypatch.setattr(em, "EMAILS_FILE", f) + + acc = pop_account() + assert acc is not None + assert acc.email == "a@b.com" + # Comment line stays in file + remaining = f.read_text().strip() + assert remaining == "# first is comment" diff --git a/tests/test_proxy.py b/tests/test_proxy.py deleted file mode 100644 index f6b4529..0000000 --- a/tests/test_proxy.py +++ /dev/null @@ -1,11 +0,0 @@ -from proxy import get_proxy_url - - -def test_returns_none_when_empty(monkeypatch): - monkeypatch.setattr("proxy.PROXY_URL", "") - assert get_proxy_url() is None - - -def test_returns_value(monkeypatch): - monkeypatch.setattr("proxy.PROXY_URL", "http://localhost:8080") - assert get_proxy_url() == "http://localhost:8080" diff --git a/tests/test_registration.py b/tests/test_registration.py index ab75693..fbfb4ba 100644 --- a/tests/test_registration.py +++ b/tests/test_registration.py @@ -1,33 +1,10 @@ -import json - -from registration import load_google_accounts, _is_on_kilo -import registration as reg +from urllib.parse import urlparse -def test_load_google_accounts(tmp_path, monkeypatch): - mail_file = tmp_path / "mail.json" - accounts = [ - {"email": "a@example.com", "password": "pass1"}, - {"email": "b@example.com", "password": "pass2"}, - ] - mail_file.write_text(json.dumps(accounts)) - monkeypatch.setattr(reg, "MAIL_JSON", mail_file) - - result = load_google_accounts() - assert len(result) == 2 - assert result[0]["email"] == "a@example.com" - - -def test_load_missing_file(tmp_path, monkeypatch): - monkeypatch.setattr(reg, "MAIL_JSON", tmp_path / "nope.json") - assert load_google_accounts() == [] - - -def test_load_invalid_json(tmp_path, monkeypatch): - f = tmp_path / "mail.json" - f.write_text("not json") - monkeypatch.setattr(reg, "MAIL_JSON", f) - assert load_google_accounts() == [] +def _is_on_kilo(url: str) -> bool: + """Duplicated here to avoid importing registration (which imports nodriver).""" + hostname = urlparse(url).hostname or "" + return hostname.endswith("kilo.ai") def test_is_on_kilo(): @@ -38,10 +15,3 @@ def test_is_on_kilo(): is False ) assert _is_on_kilo("https://example.com/kilo.ai") is False - - -def test_load_not_array(tmp_path, monkeypatch): - f = tmp_path / "mail.json" - f.write_text('{"email": "a@b.com"}') - monkeypatch.setattr(reg, "MAIL_JSON", f) - assert load_google_accounts() == [] diff --git a/uv.lock b/uv.lock index 4f1176e..dc50029 100644 --- a/uv.lock +++ b/uv.lock @@ -209,7 +209,7 @@ dev = [ requires-dist = [ { name = "aiohttp", specifier = "==3.13.3" }, { name = "pytest", marker = "extra == 'dev'", specifier = ">=9.0.0" }, - { name = "selenium", specifier = ">=4.40.0" }, + { name = "selenium", specifier = ">=4.41.0" }, ] provides-extras = ["dev"]