mvp: wonky but working selenium firefox version
This commit is contained in:
parent
061eefdb24
commit
a3c843d63c
20 changed files with 738 additions and 407 deletions
|
|
@ -1,7 +1,7 @@
|
|||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import random
|
||||
import time as _time
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
|
@ -10,6 +10,8 @@ from urllib.parse import urlparse
|
|||
from selenium import webdriver
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.firefox.options import Options
|
||||
from selenium.webdriver.firefox.service import Service
|
||||
from selenium.webdriver.common.action_chains import ActionChains
|
||||
from selenium.webdriver.remote.webdriver import WebDriver
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
from selenium.webdriver.support.ui import WebDriverWait
|
||||
|
|
@ -20,20 +22,39 @@ from selenium.common.exceptions import (
|
|||
)
|
||||
|
||||
from tokens import ProviderTokens
|
||||
from proxy import get_proxy_url, rotate_proxy_ip
|
||||
from proxy import HTTPS_PROXY, rotate_proxy_ip
|
||||
from emails import pop_account
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
DATA_DIR = Path(os.environ.get("DATA_DIR", "./data"))
|
||||
MAIL_JSON = Path(os.environ.get("MAIL_JSON", "./mail.json"))
|
||||
EXTRAS_DIR = Path(os.environ.get("EXTRAS_DIR", "./extras"))
|
||||
FIREFOX_BINARY = os.environ.get("FIREFOX_BINARY", "firefox")
|
||||
GECKODRIVER_PATH = os.environ.get("GECKODRIVER_PATH", "/usr/local/bin/geckodriver")
|
||||
|
||||
SIGN_IN_URL = "https://app.kilo.ai/users/sign_in?callbackPath=/profile"
|
||||
KILO_HOME = "https://kilo.ai/"
|
||||
PROFILE_URL = "https://app.kilo.ai/profile"
|
||||
|
||||
MAX_IP_ROTATIONS = 3
|
||||
|
||||
|
||||
def human_delay():
|
||||
_time.sleep(random.uniform(0.5, 1.35))
|
||||
|
||||
|
||||
def human_type(element, text):
|
||||
for char in text:
|
||||
element.send_keys(char)
|
||||
_time.sleep(random.uniform(0.05, 0.15))
|
||||
|
||||
|
||||
def human_click(driver, element):
|
||||
driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", element)
|
||||
human_delay()
|
||||
driver.execute_script("arguments[0].click();", element)
|
||||
human_delay()
|
||||
|
||||
|
||||
def _is_on_kilo(url: str) -> bool:
|
||||
"""Check if URL's actual domain is kilo.ai (not just in query params)."""
|
||||
hostname = urlparse(url).hostname or ""
|
||||
|
|
@ -61,33 +82,17 @@ def save_error_screenshot(driver: WebDriver | None, step: str) -> None:
|
|||
logger.warning("Failed to save screenshot at step %s: %s", step, e)
|
||||
|
||||
|
||||
def load_google_accounts() -> list[dict[str, str]]:
|
||||
"""Load Google accounts from mail.json."""
|
||||
if not MAIL_JSON.exists():
|
||||
logger.error("mail.json not found at %s", MAIL_JSON)
|
||||
return []
|
||||
try:
|
||||
with open(MAIL_JSON) as f:
|
||||
accounts = json.load(f)
|
||||
if not isinstance(accounts, list):
|
||||
logger.error("mail.json must contain a JSON array")
|
||||
return []
|
||||
return accounts
|
||||
except (json.JSONDecodeError, OSError) as e:
|
||||
logger.error("Failed to read mail.json: %s", e)
|
||||
return []
|
||||
|
||||
|
||||
def _create_firefox_driver() -> WebDriver:
|
||||
"""Launch Firefox with fresh profile mimicking a real user setup."""
|
||||
proxy_url = get_proxy_url()
|
||||
proxy_url = HTTPS_PROXY
|
||||
|
||||
options = Options()
|
||||
os.environ["TZ"] = "America/New_York"
|
||||
|
||||
# Private browsing
|
||||
options.add_argument("--private-window")
|
||||
options.set_preference("browser.privatebrowsing.autostart", True)
|
||||
# Private browsing mode with extensions allowed
|
||||
options.add_argument("-private")
|
||||
options.set_preference("extensions.privatebrowsing.autostart", True)
|
||||
options.set_preference("extensions.allowPrivateBrowsingByDefault", True)
|
||||
|
||||
# Dark theme
|
||||
options.set_preference("ui.systemUsesDarkTheme", 1)
|
||||
|
|
@ -105,6 +110,16 @@ def _create_firefox_driver() -> WebDriver:
|
|||
# Disable WebRTC IP leak
|
||||
options.set_preference("media.peerconnection.enabled", False)
|
||||
|
||||
# Anti-detection: hide webdriver
|
||||
options.set_preference("dom.webdriver.enabled", False)
|
||||
options.set_preference("useAutomationExtension", False)
|
||||
|
||||
# Enable WebGL (software rendering via Mesa)
|
||||
options.set_preference("webgl.disabled", False)
|
||||
options.set_preference("webgl.force-enabled", True)
|
||||
options.set_preference("webgl.msaa-force", True)
|
||||
options.set_preference("webgl.max-warnings-per-context", 0)
|
||||
|
||||
# Proxy
|
||||
if proxy_url:
|
||||
parsed = urlparse(proxy_url)
|
||||
|
|
@ -120,18 +135,27 @@ def _create_firefox_driver() -> WebDriver:
|
|||
logger.info("Firefox proxy: %s:%s", proxy_host, proxy_port)
|
||||
|
||||
options.binary_location = FIREFOX_BINARY
|
||||
driver = webdriver.Firefox(options=options)
|
||||
service = Service(executable_path=GECKODRIVER_PATH)
|
||||
driver = webdriver.Firefox(service=service, options=options) # type: ignore[reportCallIssue]
|
||||
driver.set_page_load_timeout(120)
|
||||
|
||||
# Install Dark Reader extension (Selenium cleanup)
|
||||
dark_reader_path = EXTRAS_DIR / "extensions" / "dark-reader.xpi"
|
||||
if dark_reader_path.exists():
|
||||
driver.install_addon(str(dark_reader_path), temporary=True)
|
||||
logger.info("Dark Reader extension installed")
|
||||
else:
|
||||
logger.warning("Dark Reader xpi not found at %s", dark_reader_path)
|
||||
|
||||
# Install uBlock Origin
|
||||
ublock_path = DATA_DIR / "extensions" / "ublock_origin.xpi"
|
||||
ublock_path = EXTRAS_DIR / "extensions" / "ublock_origin.xpi"
|
||||
if ublock_path.exists():
|
||||
driver.install_addon(str(ublock_path))
|
||||
driver.install_addon(str(ublock_path), temporary=True)
|
||||
logger.info("uBlock Origin installed")
|
||||
else:
|
||||
logger.warning("uBlock Origin xpi not found at %s", ublock_path)
|
||||
|
||||
logger.info("Firefox launched (private, dark theme, strict ETP, uBlock)")
|
||||
logger.info("Firefox launched (Dark Reader, uBlock, dark theme, strict ETP)")
|
||||
return driver
|
||||
|
||||
|
||||
|
|
@ -144,13 +168,15 @@ def _google_sign_in(driver: WebDriver, email: str, password: str) -> bool:
|
|||
email_input = wait.until(
|
||||
EC.visibility_of_element_located((By.CSS_SELECTOR, 'input[type="email"]'))
|
||||
)
|
||||
human_delay()
|
||||
email_input.clear()
|
||||
email_input.send_keys(email)
|
||||
human_delay()
|
||||
human_type(email_input, email)
|
||||
human_delay()
|
||||
|
||||
# Click Next
|
||||
next_btn = driver.find_element(By.CSS_SELECTOR, "#identifierNext")
|
||||
next_btn.click()
|
||||
_time.sleep(2)
|
||||
human_click(driver, next_btn)
|
||||
|
||||
# Enter password
|
||||
password_input = WebDriverWait(driver, 150).until(
|
||||
|
|
@ -159,21 +185,26 @@ def _google_sign_in(driver: WebDriver, email: str, password: str) -> bool:
|
|||
)
|
||||
)
|
||||
logger.info("Password field found, filling...")
|
||||
human_delay()
|
||||
password_input.clear()
|
||||
password_input.send_keys(password)
|
||||
human_delay()
|
||||
human_type(password_input, password)
|
||||
human_delay()
|
||||
|
||||
# Click Next
|
||||
try:
|
||||
password_next = driver.find_element(By.CSS_SELECTOR, "#passwordNext")
|
||||
password_next.click()
|
||||
human_click(driver, password_next)
|
||||
except NoSuchElementException:
|
||||
buttons = driver.find_elements(By.CSS_SELECTOR, "button")
|
||||
for btn in buttons:
|
||||
if "next" in btn.text.lower():
|
||||
btn.click()
|
||||
human_click(driver, btn)
|
||||
break
|
||||
_time.sleep(3)
|
||||
human_delay()
|
||||
|
||||
# wait for the page to reload # TODO: wait for a proper event
|
||||
_time.sleep(8)
|
||||
# Handle consent / TOS / speedbump screens
|
||||
for _ in range(15):
|
||||
if _is_on_kilo(driver.current_url):
|
||||
|
|
@ -185,14 +216,29 @@ def _google_sign_in(driver: WebDriver, email: str, password: str) -> bool:
|
|||
|
||||
all_buttons = driver.find_elements(By.CSS_SELECTOR, "button")
|
||||
if all_buttons:
|
||||
logger.info(
|
||||
"Found %d buttons, clicking last (allow/continue)...",
|
||||
len(all_buttons),
|
||||
btn_texts = [b.text.strip() for b in all_buttons]
|
||||
logger.info("Found %d buttons: %s", len(all_buttons), btn_texts)
|
||||
|
||||
btn = all_buttons[-1]
|
||||
driver.execute_script(
|
||||
"arguments[0].scrollIntoView({block: 'center'});", btn
|
||||
)
|
||||
all_buttons[-1].click()
|
||||
_time.sleep(3)
|
||||
human_delay()
|
||||
|
||||
# Try ActionChains for more realistic click
|
||||
try:
|
||||
ActionChains(driver).move_to_element(btn).pause(
|
||||
0.3
|
||||
).click().perform()
|
||||
except Exception:
|
||||
btn.click()
|
||||
human_delay()
|
||||
|
||||
# Check if URL changed
|
||||
if _is_on_kilo(driver.current_url):
|
||||
return True
|
||||
else:
|
||||
_time.sleep(2)
|
||||
human_delay()
|
||||
|
||||
return _is_on_kilo(driver.current_url)
|
||||
|
||||
|
|
@ -208,27 +254,85 @@ def _try_register_once_sync(
|
|||
) -> str | None:
|
||||
"""Attempt one full registration cycle via Google OAuth."""
|
||||
try:
|
||||
# Step 1: Navigate to sign-in
|
||||
logger.info("[1/4] Navigating to Kilo sign-in page...")
|
||||
driver.get(SIGN_IN_URL)
|
||||
# Step 1: Navigate to Kilo home
|
||||
logger.info("[1/6] Navigating to Kilo home...")
|
||||
driver.get(KILO_HOME)
|
||||
human_delay()
|
||||
|
||||
wait = WebDriverWait(driver, 150)
|
||||
|
||||
# Step 2: Click "Continue with Google"
|
||||
logger.info("[2/4] Clicking 'Continue with Google'...")
|
||||
google_btn = wait.until(
|
||||
# Step 2: Click Sign up (opens new tab)
|
||||
logger.info("[2/6] Clicking 'Sign up'...")
|
||||
handles_before = set(driver.window_handles)
|
||||
signup_btn = wait.until(
|
||||
EC.element_to_be_clickable(
|
||||
(By.XPATH, "//*[contains(text(), 'Continue with Google')]")
|
||||
(
|
||||
By.XPATH,
|
||||
"//a[contains(text(), 'Sign up') or contains(text(), 'sign up')]",
|
||||
)
|
||||
)
|
||||
)
|
||||
google_btn.click()
|
||||
human_click(driver, signup_btn)
|
||||
|
||||
# Switch to new tab
|
||||
WebDriverWait(driver, 30).until(
|
||||
lambda d: len(d.window_handles) > len(handles_before)
|
||||
)
|
||||
new_handles = set(driver.window_handles) - handles_before
|
||||
if new_handles:
|
||||
driver.switch_to.window(new_handles.pop())
|
||||
logger.info("[2/6] Switched to new tab: %s", driver.current_url)
|
||||
else:
|
||||
raise AutomationError(
|
||||
"signup", "No new tab opened after clicking Sign up", driver
|
||||
)
|
||||
human_delay()
|
||||
|
||||
# Wait for page load
|
||||
WebDriverWait(driver, 30).until(
|
||||
lambda d: d.execute_script("return document.readyState") == "complete"
|
||||
)
|
||||
human_delay()
|
||||
logger.info("[2/6] Page loaded: %s", driver.current_url)
|
||||
|
||||
# Step 3: Click "Sign in or Sign up"
|
||||
logger.info("[3/6] Clicking 'Sign in or Sign up'...")
|
||||
signin_signup_btn = wait.until(
|
||||
EC.element_to_be_clickable(
|
||||
(
|
||||
By.XPATH,
|
||||
"//a[contains(text(), 'Sign in') or contains(text(), 'sign in') or contains(text(), 'Sign up') or contains(text(), 'sign up')]",
|
||||
)
|
||||
)
|
||||
)
|
||||
human_click(driver, signin_signup_btn)
|
||||
human_delay()
|
||||
|
||||
# Wait for page load
|
||||
WebDriverWait(driver, 30).until(
|
||||
lambda d: d.execute_script("return document.readyState") == "complete"
|
||||
)
|
||||
human_delay()
|
||||
logger.info("[3/6] Redirected to: %s", driver.current_url)
|
||||
|
||||
# Step 4: Click "Sign in with Google"
|
||||
logger.info("[4/6] Clicking 'Sign in with Google'...")
|
||||
google_btn = wait.until(
|
||||
EC.element_to_be_clickable(
|
||||
(
|
||||
By.XPATH,
|
||||
"//*[contains(text(), 'Sign in with Google') or contains(text(), 'Continue with Google')]",
|
||||
)
|
||||
)
|
||||
)
|
||||
human_click(driver, google_btn)
|
||||
|
||||
# Wait for Google
|
||||
WebDriverWait(driver, 30).until(EC.url_contains("accounts.google.com"))
|
||||
logger.info("[2/4] Google sign-in page loaded: %s", driver.current_url)
|
||||
logger.info("[4/6] Google sign-in page loaded: %s", driver.current_url)
|
||||
|
||||
# Step 3: Google sign-in
|
||||
logger.info("[3/4] Signing in with Google (%s)...", email)
|
||||
# Step 5: Google sign-in
|
||||
logger.info("[5/6] Signing in with Google (%s)...", email)
|
||||
success = _google_sign_in(driver, email, password)
|
||||
|
||||
if not success and not _is_on_kilo(driver.current_url):
|
||||
|
|
@ -237,16 +341,16 @@ def _try_register_once_sync(
|
|||
)
|
||||
|
||||
# Wait for redirect to kilo.ai
|
||||
logger.info("[3/4] Waiting for Kilo redirect...")
|
||||
logger.info("[5/6] Waiting for Kilo redirect...")
|
||||
deadline = _time.time() + 120
|
||||
while _time.time() < deadline:
|
||||
if (
|
||||
_is_on_kilo(driver.current_url)
|
||||
and "/users/sign_in" not in driver.current_url
|
||||
):
|
||||
logger.info("[3/4] On kilo.ai: %s", driver.current_url)
|
||||
logger.info("[5/6] On kilo.ai: %s", driver.current_url)
|
||||
break
|
||||
_time.sleep(1)
|
||||
human_delay()
|
||||
else:
|
||||
logger.warning("Redirect not detected, current: %s", driver.current_url)
|
||||
|
||||
|
|
@ -255,11 +359,10 @@ def _try_register_once_sync(
|
|||
confirm_btn = WebDriverWait(driver, 10).until(
|
||||
EC.element_to_be_clickable((By.CSS_SELECTOR, "input#confirm"))
|
||||
)
|
||||
logger.info("[3/4] Educational account page, clicking confirm...")
|
||||
confirm_btn.click()
|
||||
_time.sleep(3)
|
||||
logger.info("[5/6] Educational account page, clicking confirm...")
|
||||
human_click(driver, confirm_btn)
|
||||
except TimeoutException:
|
||||
logger.info("[3/4] No educational account page, continuing...")
|
||||
logger.info("[5/6] No educational account page, continuing...")
|
||||
|
||||
# Wait for /get-started or /profile
|
||||
deadline = _time.time() + 60
|
||||
|
|
@ -267,11 +370,12 @@ def _try_register_once_sync(
|
|||
url = driver.current_url
|
||||
if "/get-started" in url or "/profile" in url:
|
||||
break
|
||||
_time.sleep(1)
|
||||
human_delay()
|
||||
|
||||
# Step 4: Get API key
|
||||
logger.info("[4/4] Navigating to profile to get API key...")
|
||||
# Step 6: Get API key
|
||||
logger.info("[6/6] Navigating to profile to get API key...")
|
||||
driver.get(PROFILE_URL)
|
||||
human_delay()
|
||||
|
||||
api_key_input = WebDriverWait(driver, 200).until(
|
||||
EC.visibility_of_element_located((By.CSS_SELECTOR, "input#api-key"))
|
||||
|
|
@ -282,7 +386,7 @@ def _try_register_once_sync(
|
|||
raise AutomationError("profile", "API key input is empty", driver)
|
||||
|
||||
api_key = api_key.strip()
|
||||
logger.info("[4/4] API key obtained (length=%d)", len(api_key))
|
||||
logger.info("[6/6] API key obtained (length=%d)", len(api_key))
|
||||
return api_key
|
||||
|
||||
except AutomationError as e:
|
||||
|
|
@ -298,15 +402,14 @@ def _try_register_once_sync(
|
|||
async def register_kilo_account() -> ProviderTokens | None:
|
||||
"""Register a new Kilo account via Google OAuth using Selenium Firefox.
|
||||
|
||||
Reads Google accounts from mail.json, tries each one.
|
||||
Pops one email account from emails.txt and attempts registration.
|
||||
Rotates proxy IP between attempts if needed.
|
||||
Browser is NOT closed after registration.
|
||||
"""
|
||||
logger.info("=== Starting Kilo account registration (Google OAuth) ===")
|
||||
|
||||
accounts = load_google_accounts()
|
||||
if not accounts:
|
||||
logger.error("No Google accounts available in mail.json")
|
||||
account = pop_account()
|
||||
if not account:
|
||||
logger.error("No email accounts available")
|
||||
return None
|
||||
|
||||
driver: WebDriver | None = None
|
||||
|
|
@ -315,6 +418,8 @@ async def register_kilo_account() -> ProviderTokens | None:
|
|||
driver = await asyncio.to_thread(_create_firefox_driver)
|
||||
|
||||
for ip_attempt in range(MAX_IP_ROTATIONS):
|
||||
# driver.get("http://localhost:8005/")
|
||||
# await asyncio.sleep(100000000000000000) # for debugging
|
||||
if ip_attempt > 0:
|
||||
logger.info(
|
||||
"Rotating proxy IP (attempt %d/%d)...",
|
||||
|
|
@ -325,36 +430,27 @@ async def register_kilo_account() -> ProviderTokens | None:
|
|||
if not rotated:
|
||||
logger.warning("IP rotation failed, trying anyway")
|
||||
|
||||
for account in accounts:
|
||||
email = account.get("email", "")
|
||||
password = account.get("password", "")
|
||||
if not email or not password:
|
||||
logger.warning("Skipping account with missing credentials")
|
||||
continue
|
||||
logger.info(
|
||||
"Trying Google account: %s (IP attempt %d/%d)",
|
||||
account.email,
|
||||
ip_attempt + 1,
|
||||
MAX_IP_ROTATIONS,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Trying Google account: %s (IP rotation %d/%d)",
|
||||
email,
|
||||
ip_attempt + 1,
|
||||
MAX_IP_ROTATIONS,
|
||||
api_key = await asyncio.to_thread(
|
||||
_try_register_once_sync, driver, account.email, account.password
|
||||
)
|
||||
|
||||
if api_key:
|
||||
return ProviderTokens(
|
||||
access_token=api_key,
|
||||
refresh_token=None,
|
||||
expires_at=0,
|
||||
)
|
||||
|
||||
api_key = await asyncio.to_thread(
|
||||
_try_register_once_sync, driver, email, password
|
||||
)
|
||||
await asyncio.sleep(2)
|
||||
|
||||
if api_key:
|
||||
return ProviderTokens(
|
||||
access_token=api_key,
|
||||
refresh_token=None,
|
||||
expires_at=0,
|
||||
)
|
||||
|
||||
await asyncio.sleep(2)
|
||||
|
||||
logger.warning("All accounts failed for current IP")
|
||||
|
||||
logger.error("All registration attempts exhausted")
|
||||
logger.error("All registration attempts exhausted for %s", account.email)
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue