From 95886e6b50a55a13e0975f3178bcf4b72d33bcd1 Mon Sep 17 00:00:00 2001 From: Vlad Pronsky Date: Fri, 14 Jul 2023 23:50:32 +0300 Subject: [PATCH] check email before login; update logs --- twscrape/accounts_pool.py | 9 ++++++--- twscrape/imap.py | 42 ++++++++++++++++++++++++++------------- twscrape/login.py | 21 ++++++++++---------- twscrape/queue_client.py | 8 +++++--- twscrape/utils.py | 2 +- 5 files changed, 51 insertions(+), 31 deletions(-) diff --git a/twscrape/accounts_pool.py b/twscrape/accounts_pool.py index 51098ae..10710fc 100644 --- a/twscrape/accounts_pool.py +++ b/twscrape/accounts_pool.py @@ -42,6 +42,7 @@ class AccountsPool: if not required.issubset(tokens): raise ValueError(f"Invalid line format: {line_format}") + accounts = [] with open(filepath, "r") as f: lines = f.read().split("\n") lines = [x.strip() for x in lines if x.strip()] @@ -49,12 +50,14 @@ class AccountsPool: for line in lines: data = [x.strip() for x in line.split(line_delim)] if len(data) < len(tokens): - logger.warning(f"Invalid line format: {line}") - continue + raise ValueError(f"Invalid line: {line}") data = data[: len(tokens)] vals = {k: v for k, v in zip(tokens, data) if k != "_"} - await self.add_account(**vals) + accounts.append(vals) + + for x in accounts: + await self.add_account(**x) async def add_account( self, diff --git a/twscrape/imap.py b/twscrape/imap.py index 3009367..8300bde 100644 --- a/twscrape/imap.py +++ b/twscrape/imap.py @@ -50,7 +50,7 @@ def search_email_code(imap: imaplib.IMAP4_SSL, count: int, min_t: datetime | Non msg_time = datetime.strptime(msg.get("Date", ""), "%a, %d %b %Y %H:%M:%S %z") msg_from = str(msg.get("From", "")).lower() msg_subj = str(msg.get("Subject", "")).lower() - logger.debug(f"({i} of {count}) {msg_from} - {msg_time} - {msg_subj}") + logger.info(f"({i} of {count}) {msg_from} - {msg_time} - {msg_subj}") if min_t is not None and msg_time < min_t: return None @@ -62,17 +62,11 @@ def search_email_code(imap: imaplib.IMAP4_SSL, count: int, min_t: datetime | Non return None -async def get_email_code(email: str, password: str, min_t: datetime | None = None) -> str: - domain = get_imap_domain(email) - start_time = time.time() - with imaplib.IMAP4_SSL(domain) as imap: - try: - imap.login(email, password) - except imaplib.IMAP4.error as e: - logger.error(f"Error logging into {email}: {e}") - raise EmailLoginError() from e - - was_count = 0 +async def imap_get_email_code( + imap: imaplib.IMAP4_SSL, email: str, min_t: datetime | None = None +) -> str: + try: + start_time, was_count = time.time(), 0 while True: _, rep = imap.select("INBOX") now_count = int(rep[0].decode("utf-8")) if len(rep) > 0 and rep[0] is not None else 0 @@ -81,8 +75,28 @@ async def get_email_code(email: str, password: str, min_t: datetime | None = Non if code is not None: return code - logger.debug(f"Waiting for confirmation code for {email}, msg_count: {now_count}") + logger.info(f"Waiting for confirmation code for {email}, msg_count: {now_count}") if MAX_WAIT_SEC < time.time() - start_time: - logger.error(f"Timeout waiting for confirmation code for {email}") + logger.info(f"Timeout waiting for confirmation code for {email}") raise EmailCodeTimeoutError() await asyncio.sleep(5) + except Exception as e: + imap.select("INBOX") + imap.close() + logger.error(f"Error getting confirmation code for {email}: {e}") + raise e + + +async def imap_try_login(email: str, password: str): + domain = get_imap_domain(email) + imap = imaplib.IMAP4_SSL(domain) + + try: + imap.login(email, password) + except imaplib.IMAP4.error as e: + logger.error(f"Error logging into {email} on {domain}: {e}") + imap.select("INBOX") + imap.close() + raise EmailLoginError() from e + + return imap diff --git a/twscrape/login.py b/twscrape/login.py index 1cd6fa9..e6324b2 100644 --- a/twscrape/login.py +++ b/twscrape/login.py @@ -4,7 +4,7 @@ from httpx import AsyncClient, HTTPStatusError, Response from .account import Account from .constants import LOGIN_URL -from .imap import get_email_code +from .imap import imap_get_email_code, imap_try_login from .logger import logger from .utils import raise_for_status @@ -100,7 +100,7 @@ async def login_duplication_check(client: AsyncClient, acc: Account, prev: dict) return rep -async def login_confirm_email(client: AsyncClient, acc: Account, prev: dict) -> Response: +async def login_confirm_email(client: AsyncClient, acc: Account, prev: dict, imap) -> Response: payload = { "flow_token": prev["flow_token"], "subtask_inputs": [ @@ -116,9 +116,9 @@ async def login_confirm_email(client: AsyncClient, acc: Account, prev: dict) -> return rep -async def login_confirm_email_code(client: AsyncClient, acc: Account, prev: dict): +async def login_confirm_email_code(client: AsyncClient, acc: Account, prev: dict, imap): now_time = datetime.now(timezone.utc) - timedelta(seconds=30) - value = await get_email_code(acc.email, acc.email_password, now_time) + value = await imap_get_email_code(imap, acc.email, now_time) payload = { "flow_token": prev["flow_token"], @@ -146,7 +146,7 @@ async def login_success(client: AsyncClient, acc: Account, prev: dict) -> Respon return rep -async def next_login_task(client: AsyncClient, acc: Account, rep: Response): +async def next_login_task(client: AsyncClient, acc: Account, rep: Response, imap): ct0 = client.cookies.get("ct0", None) if ct0: client.headers["x-csrf-token"] = ct0 @@ -164,9 +164,8 @@ async def next_login_task(client: AsyncClient, acc: Account, rep: Response): return await login_success(client, acc, prev) if task_id == "LoginAcid": is_code = x["enter_text"]["hint_text"].lower() == "confirmation code" - # logger.debug(f"is login code: {is_code}") fn = login_confirm_email_code if is_code else login_confirm_email - return await fn(client, acc, prev) + return await fn(client, acc, prev, imap) if task_id == "AccountDuplicationCheck": return await login_duplication_check(client, acc, prev) if task_id == "LoginEnterPassword": @@ -186,10 +185,12 @@ async def next_login_task(client: AsyncClient, acc: Account, rep: Response): async def login(acc: Account, fresh=False) -> Account: log_id = f"{acc.username} - {acc.email}" if acc.active and not fresh: - logger.debug(f"account already active {log_id}") + logger.info(f"account already active {log_id}") return acc - logger.debug(f"logging in {log_id}") + # check if email is valid first + imap = await imap_try_login(acc.email, acc.email_password) + client = acc.make_client() guest_token = await get_guest_token(client) client.headers["x-guest-token"] = guest_token @@ -200,7 +201,7 @@ async def login(acc: Account, fresh=False) -> Account: break try: - rep = await next_login_task(client, acc, rep) + rep = await next_login_task(client, acc, rep, imap) except HTTPStatusError as e: if e.response.status_code == 403: logger.error(f"403 error {log_id}") diff --git a/twscrape/queue_client.py b/twscrape/queue_client.py index e50ddff..9af36cf 100644 --- a/twscrape/queue_client.py +++ b/twscrape/queue_client.py @@ -105,7 +105,8 @@ class QueueClient: return self.ctx async def _check_rep(self, rep: httpx.Response): - dump_rep(rep) + if self.debug: + dump_rep(rep) try: res = rep.json() @@ -116,8 +117,9 @@ class QueueClient: if "errors" in res: msg = "; ".join([f'({x.get("code", -1)}) {x["message"]}' for x in res["errors"]]) - fn = logger.info if rep.status_code == 200 else logger.warning - fn(f"{rep.status_code:3d} - {req_id(rep)} - {msg}") + if self.debug: + fn = logger.info if rep.status_code == 200 else logger.warning + fn(f"{rep.status_code:3d} - {req_id(rep)} - {msg}") if msg.startswith("The following features cannot be null"): logger.error(f"Invalid request: {msg}") diff --git a/twscrape/utils.py b/twscrape/utils.py index 582ee92..d1d32e4 100644 --- a/twscrape/utils.py +++ b/twscrape/utils.py @@ -22,7 +22,7 @@ def raise_for_status(rep: Response, label: str): try: rep.raise_for_status() except HTTPStatusError as e: - logger.debug(f"{label} - {rep.status_code} - {rep.text}") + logger.info(f"{label} - {rep.status_code} - {rep.text}") raise e