зеркало из
https://github.com/viginum-datalab/twscrape.git
synced 2025-10-30 05:26:20 +02:00
update ban detection; fix cli relogin command to relogin only selected accounts
Этот коммит содержится в:
родитель
5fc84d8696
Коммит
d78e33d2cc
@ -20,7 +20,7 @@ classifiers = [
|
|||||||
]
|
]
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"aiosqlite>=0.17.0",
|
"aiosqlite>=0.17.0",
|
||||||
"fake-useragent>=1.3.0",
|
"fake-useragent>=1.4.0",
|
||||||
"httpx>=0.24.0",
|
"httpx>=0.24.0",
|
||||||
"loguru>=0.7.0",
|
"loguru>=0.7.0",
|
||||||
]
|
]
|
||||||
|
|||||||
@ -142,10 +142,14 @@ class AccountsPool:
|
|||||||
finally:
|
finally:
|
||||||
await self.save(account)
|
await self.save(account)
|
||||||
|
|
||||||
async def login_all(self, email_first=False):
|
async def login_all(self, email_first=False, usernames: list[str] | None = None):
|
||||||
qs = "SELECT * FROM accounts WHERE active = false AND error_msg IS NULL"
|
if usernames is None:
|
||||||
rs = await fetchall(self._db_file, qs)
|
qs = "SELECT * FROM accounts WHERE active = false AND error_msg IS NULL"
|
||||||
|
else:
|
||||||
|
us = ",".join([f'"{x}"' for x in usernames])
|
||||||
|
qs = f"SELECT * FROM accounts WHERE username IN ({us})"
|
||||||
|
|
||||||
|
rs = await fetchall(self._db_file, qs)
|
||||||
accounts = [Account.from_rs(rs) for rs in rs]
|
accounts = [Account.from_rs(rs) for rs in rs]
|
||||||
# await asyncio.gather(*[login(x) for x in self.accounts])
|
# await asyncio.gather(*[login(x) for x in self.accounts])
|
||||||
|
|
||||||
@ -176,7 +180,7 @@ class AccountsPool:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
await execute(self._db_file, qs)
|
await execute(self._db_file, qs)
|
||||||
await self.login_all(email_first=email_first)
|
await self.login_all(email_first=email_first, usernames=usernames)
|
||||||
|
|
||||||
async def relogin_failed(self, email_first=False):
|
async def relogin_failed(self, email_first=False):
|
||||||
qs = "SELECT username FROM accounts WHERE active = false AND error_msg IS NOT NULL"
|
qs = "SELECT username FROM accounts WHERE active = false AND error_msg IS NOT NULL"
|
||||||
@ -248,13 +252,17 @@ class AccountsPool:
|
|||||||
|
|
||||||
return Account.from_rs(rs) if rs else None
|
return Account.from_rs(rs) if rs else None
|
||||||
|
|
||||||
async def get_for_queue_or_wait(self, queue: str) -> Account:
|
async def get_for_queue_or_wait(self, queue: str) -> Account | None:
|
||||||
msg_shown = False
|
msg_shown = False
|
||||||
while True:
|
while True:
|
||||||
account = await self.get_for_queue(queue)
|
account = await self.get_for_queue(queue)
|
||||||
if not account:
|
if not account:
|
||||||
if not msg_shown:
|
if not msg_shown:
|
||||||
nat = await self.next_available_at(queue)
|
nat = await self.next_available_at(queue)
|
||||||
|
if not nat:
|
||||||
|
logger.warning("No active accounts. Stopping...")
|
||||||
|
return None
|
||||||
|
|
||||||
msg = f'No account available for queue "{queue}". Next available at {nat}'
|
msg = f'No account available for queue "{queue}". Next available at {nat}'
|
||||||
logger.info(msg)
|
logger.info(msg)
|
||||||
msg_shown = True
|
msg_shown = True
|
||||||
@ -283,9 +291,9 @@ class AccountsPool:
|
|||||||
at_local = datetime.now() + (trg - now)
|
at_local = datetime.now() + (trg - now)
|
||||||
return at_local.strftime("%H:%M:%S")
|
return at_local.strftime("%H:%M:%S")
|
||||||
|
|
||||||
return "none"
|
return None
|
||||||
|
|
||||||
async def mark_banned(self, username: str, error_msg: str):
|
async def mark_inactive(self, username: str, error_msg: str | None):
|
||||||
qs = """
|
qs = """
|
||||||
UPDATE accounts SET active = false, error_msg = :error_msg
|
UPDATE accounts SET active = false, error_msg = :error_msg
|
||||||
WHERE username = :username
|
WHERE username = :username
|
||||||
|
|||||||
@ -73,6 +73,7 @@ async def main(args):
|
|||||||
|
|
||||||
if args.command == "add_accounts":
|
if args.command == "add_accounts":
|
||||||
await pool.load_from_file(args.file_path, args.line_format)
|
await pool.load_from_file(args.file_path, args.line_format)
|
||||||
|
print("\nNow run:\ntwscrape login_accounts")
|
||||||
return
|
return
|
||||||
|
|
||||||
if args.command == "del_accounts":
|
if args.command == "del_accounts":
|
||||||
|
|||||||
@ -10,7 +10,7 @@ from .utils import int_or
|
|||||||
|
|
||||||
_env = dict(os.environ)
|
_env = dict(os.environ)
|
||||||
|
|
||||||
LOGIN_CODE_TIMEOUT = int_or(_env, "LOGIN_CODE_TIMEOUT") or 40
|
LOGIN_CODE_TIMEOUT = int_or(_env, "LOGIN_CODE_TIMEOUT") or 30
|
||||||
|
|
||||||
|
|
||||||
class EmailLoginError(Exception):
|
class EmailLoginError(Exception):
|
||||||
|
|||||||
@ -3,7 +3,7 @@ from typing import Literal
|
|||||||
|
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
_LEVELS = Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]
|
_LEVELS = Literal["TRACE", "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]
|
||||||
_LOG_LEVEL: _LEVELS = "INFO"
|
_LOG_LEVEL: _LEVELS = "INFO"
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -19,11 +19,7 @@ class Ctx:
|
|||||||
self.req_count = 0
|
self.req_count = 0
|
||||||
|
|
||||||
|
|
||||||
class RateLimitError(Exception):
|
class HandledError(Exception):
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class BannedError(Exception):
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
@ -82,7 +78,7 @@ class QueueClient:
|
|||||||
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
||||||
await self._close_ctx()
|
await self._close_ctx()
|
||||||
|
|
||||||
async def _close_ctx(self, reset_at=-1, banned=False, msg=""):
|
async def _close_ctx(self, reset_at=-1, inactive=False, msg: str | None = None):
|
||||||
if self.ctx is None:
|
if self.ctx is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
@ -90,8 +86,8 @@ class QueueClient:
|
|||||||
username = ctx.acc.username
|
username = ctx.acc.username
|
||||||
await ctx.clt.aclose()
|
await ctx.clt.aclose()
|
||||||
|
|
||||||
if banned:
|
if inactive:
|
||||||
await self.pool.mark_banned(username, msg)
|
await self.pool.mark_inactive(username, msg)
|
||||||
return
|
return
|
||||||
|
|
||||||
if reset_at > 0:
|
if reset_at > 0:
|
||||||
@ -123,60 +119,75 @@ class QueueClient:
|
|||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
res: Any = {"_raw": rep.text}
|
res: Any = {"_raw": rep.text}
|
||||||
|
|
||||||
|
limit_remaining = int(rep.headers.get("x-rate-limit-remaining", -1))
|
||||||
|
limit_reset = int(rep.headers.get("x-rate-limit-reset", -1))
|
||||||
|
# limit_max = int(rep.headers.get("x-rate-limit-limit", -1))
|
||||||
|
|
||||||
err_msg = "OK"
|
err_msg = "OK"
|
||||||
if "errors" in res:
|
if "errors" in res:
|
||||||
err_msg = set([f'({x.get("code", -1)}) {x["message"]}' for x in res["errors"]])
|
err_msg = set([f'({x.get("code", -1)}) {x["message"]}' for x in res["errors"]])
|
||||||
err_msg = "; ".join(list(err_msg))
|
err_msg = "; ".join(list(err_msg))
|
||||||
|
|
||||||
if self.debug:
|
log_msg = f"{rep.status_code:3d} - {req_id(rep)} - {err_msg}"
|
||||||
fn = logger.debug if rep.status_code == 200 else logger.warning
|
print(log_msg)
|
||||||
fn(f"{rep.status_code:3d} - {req_id(rep)} - {err_msg}")
|
logger.trace(log_msg)
|
||||||
|
|
||||||
# need to add some features in api.py
|
# for dev: need to add some features in api.py
|
||||||
if err_msg.startswith("(336) The following features cannot be null"):
|
if err_msg.startswith("(336) The following features cannot be null"):
|
||||||
logger.error(f"Update required: {err_msg}")
|
logger.error(f"[DEV] Update required: {err_msg}")
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
# general api rate limit
|
# general api rate limit
|
||||||
if int(rep.headers.get("x-rate-limit-remaining", -1)) == 0:
|
if limit_remaining == 0 and limit_reset > 0:
|
||||||
await self._close_ctx(int(rep.headers.get("x-rate-limit-reset", -1)))
|
logger.debug(f"Rate limited: {log_msg}")
|
||||||
raise RateLimitError(err_msg)
|
await self._close_ctx(limit_reset)
|
||||||
|
raise HandledError()
|
||||||
|
|
||||||
# possible new limits for tweets view per account
|
# no way to check is account banned in direct way, but this check should work
|
||||||
if err_msg.startswith("(88) Rate limit exceeded") or rep.status_code == 429:
|
if err_msg.startswith("(88) Rate limit exceeded") and limit_remaining > 0:
|
||||||
await self._close_ctx(utc.ts() + 60 * 60 * 4) # lock for 4 hours
|
logger.warning(f"Ban detected: {log_msg}")
|
||||||
raise RateLimitError(err_msg)
|
await self._close_ctx(-1, inactive=True, msg=err_msg)
|
||||||
|
raise HandledError()
|
||||||
|
|
||||||
if err_msg.startswith("(326) Authorization: Denied by access control"):
|
if err_msg.startswith("(326) Authorization: Denied by access control"):
|
||||||
await self._close_ctx(-1, banned=True, msg=err_msg)
|
logger.warning(f"Ban detected: {log_msg}")
|
||||||
raise BannedError(err_msg)
|
await self._close_ctx(-1, inactive=True, msg=err_msg)
|
||||||
|
raise HandledError()
|
||||||
|
|
||||||
# Something from twitter side, abort request so it doesn't hang
|
if err_msg.startswith("(32) Could not authenticate you"):
|
||||||
# https://github.com/vladkens/twscrape/pull/80
|
logger.warning(f"Session expired or banned: {log_msg}")
|
||||||
if err_msg.startswith("(131) Dependency: Internal error."):
|
await self._close_ctx(-1, inactive=True, msg=err_msg)
|
||||||
|
raise HandledError()
|
||||||
|
|
||||||
|
if err_msg == "OK" and rep.status_code == 403:
|
||||||
|
logger.warning(f"Session expired or banned: {log_msg}")
|
||||||
|
await self._close_ctx(-1, inactive=True, msg=None)
|
||||||
|
raise HandledError()
|
||||||
|
|
||||||
|
# something from twitter side - abort all queries, see: https://github.com/vladkens/twscrape/pull/80
|
||||||
|
if err_msg.startswith("(131) Dependency: Internal error"):
|
||||||
logger.warning(f"Dependency error (request skipped): {err_msg}")
|
logger.warning(f"Dependency error (request skipped): {err_msg}")
|
||||||
raise AbortReqError()
|
raise AbortReqError()
|
||||||
|
|
||||||
# possible banned by old api flow
|
|
||||||
if rep.status_code in (401, 403):
|
|
||||||
await self._close_ctx(utc.ts() + 60 * 60 * 12) # lock for 12 hours
|
|
||||||
raise RateLimitError(err_msg)
|
|
||||||
|
|
||||||
# content not found
|
# content not found
|
||||||
if rep.status_code == 200 and "_Missing: No status found with that ID." in err_msg:
|
if rep.status_code == 200 and "_Missing: No status found with that ID" in err_msg:
|
||||||
return # ignore this error
|
return # ignore this error
|
||||||
|
|
||||||
# Something from twitter side, just ignore it
|
# something from twitter side - just ignore it, see: https://github.com/vladkens/twscrape/pull/95
|
||||||
# https://github.com/vladkens/twscrape/pull/95
|
|
||||||
if rep.status_code == 200 and "Authorization" in err_msg:
|
if rep.status_code == 200 and "Authorization" in err_msg:
|
||||||
logger.warning(f"Authorization unknown error: {err_msg}")
|
logger.warning(f"Authorization unknown error: {log_msg}")
|
||||||
return
|
return
|
||||||
|
|
||||||
if err_msg != "OK":
|
if err_msg != "OK":
|
||||||
logger.warning(f"API unknown error: {err_msg}")
|
logger.warning(f"API unknown error: {log_msg}")
|
||||||
return # ignore any other unknown errors
|
return # ignore any other unknown errors
|
||||||
|
|
||||||
rep.raise_for_status()
|
try:
|
||||||
|
rep.raise_for_status()
|
||||||
|
except httpx.HTTPStatusError:
|
||||||
|
logger.error(f"Unhandled API response code: {log_msg}")
|
||||||
|
await self._close_ctx(utc.ts() + 60 * 15) # 15 minutes
|
||||||
|
raise HandledError()
|
||||||
|
|
||||||
async def get(self, url: str, params: ReqParams = None):
|
async def get(self, url: str, params: ReqParams = None):
|
||||||
return await self.req("GET", url, params=params)
|
return await self.req("GET", url, params=params)
|
||||||
@ -185,6 +196,8 @@ class QueueClient:
|
|||||||
retry_count = 0
|
retry_count = 0
|
||||||
while True:
|
while True:
|
||||||
ctx = await self._get_ctx()
|
ctx = await self._get_ctx()
|
||||||
|
if ctx is None:
|
||||||
|
return None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
rep = await ctx.clt.request(method, url, params=params)
|
rep = await ctx.clt.request(method, url, params=params)
|
||||||
@ -194,16 +207,17 @@ class QueueClient:
|
|||||||
ctx.req_count += 1 # count only successful
|
ctx.req_count += 1 # count only successful
|
||||||
retry_count = 0
|
retry_count = 0
|
||||||
return rep
|
return rep
|
||||||
except (RateLimitError, BannedError):
|
|
||||||
# already handled
|
|
||||||
continue
|
|
||||||
except AbortReqError:
|
except AbortReqError:
|
||||||
|
# abort all queries
|
||||||
return
|
return
|
||||||
|
except HandledError:
|
||||||
|
# retry with new account
|
||||||
|
continue
|
||||||
except (httpx.ReadTimeout, httpx.ProxyError):
|
except (httpx.ReadTimeout, httpx.ProxyError):
|
||||||
# http transport failed, just retry
|
# http transport failed, just retry with same account
|
||||||
continue
|
continue
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
retry_count += 1
|
retry_count += 1
|
||||||
if retry_count >= 3:
|
if retry_count >= 3:
|
||||||
logger.warning(f"Unknown error {type(e)}: {e}")
|
logger.warning(f"Unhandled error {type(e)}: {e}")
|
||||||
await self._close_ctx(utc.ts() + 60 * 15) # 15 minutes
|
await self._close_ctx(utc.ts() + 60 * 15) # 15 minutes
|
||||||
|
|||||||
Загрузка…
x
Ссылка в новой задаче
Block a user