add exception on rate limit & save it to session dump

Этот коммит содержится в:
Vlad Pronsky 2023-04-23 14:17:01 +03:00
родитель 5f4b0d4c4e
Коммит fa16f206f8
2 изменённых файлов: 48 добавлений и 9 удалений

Просмотреть файл

@ -3,6 +3,8 @@ import imaplib
import json
import os
import time
from collections import defaultdict
from datetime import datetime, timezone
from fake_useragent import UserAgent
from httpx import Client, HTTPStatusError, Response
@ -12,6 +14,12 @@ TOKEN = "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Z
TASK_URL = "https://api.twitter.com/1.1/onboarding/task.json"
class RateLimitExceeded(Exception):
def __init__(self, reset: int, cursor: str | None = None):
self.reset = reset
self.cursor = cursor
def search_email_with_confirmation_code(imap: imaplib.IMAP4_SSL, msg_count: int) -> str | None:
for i in range(msg_count, 0, -1):
_, rep = imap.fetch(str(i), "(RFC822)")
@ -190,11 +198,17 @@ class UserClient:
dirname = os.path.dirname(self.session_path)
os.makedirs(dirname, exist_ok=True)
self.limits = defaultdict(dict)
self.locked = False
def save(self):
cookies = dict(self.client.cookies)
headers = dict(self.client.headers)
data = {
"cookies": dict(self.client.cookies),
"headers": dict(self.client.headers),
"limits": dict(self.limits),
}
with open(self.session_path, "w") as fp:
json.dump({"cookies": cookies, "headers": headers}, fp)
json.dump(data, fp, indent=2)
def restore(self):
try:
@ -202,10 +216,25 @@ class UserClient:
data = json.load(fp)
self.client.cookies.update(data["cookies"])
self.client.headers.update(data["headers"])
self.limits.update(data.get("limits", {}))
return True
except (FileNotFoundError, json.JSONDecodeError):
return False
def _update_limits(self, rep: Response):
for k, v in rep.headers.items():
if k.startswith("x-rate-limit-"):
self.limits[rep.url.path][k] = v
self.save()
def check_limits(self, rep: Response, cursor: str | None):
if rep.status_code == 429:
reset = int(rep.headers.get("x-rate-limit-reset"))
reset_time = datetime.fromtimestamp(reset, tz=timezone.utc)
logger.debug(f"Rate limit exceeded for {self.username} - reset at {reset_time}")
self._update_limits(rep)
raise RateLimitExceeded(reset, cursor)
def print_session(self):
for x in self.client.headers.items():
print(x)

Просмотреть файл

@ -1,7 +1,7 @@
from httpx import AsyncClient
from httpx import AsyncClient, Response
from loguru import logger
from .client import UserClient
from .client import UserClient, raise_for_status
BASIC_SEARCH_PARAMS = """
include_profile_interstitial_type=1
@ -63,6 +63,11 @@ class Search:
logger.debug(e)
return None
def _log(self, rep: Response, msg: str):
limit = int(rep.headers.get("x-rate-limit-limit", -1))
remaining = int(rep.headers.get("x-rate-limit-remaining", -1))
logger.debug(f"[{remaining:3,d}/{limit:3,d}] {self.account.username} - {msg}")
async def query(self, q: str, cursor: str | None = None):
client = AsyncClient()
client.headers.update(self.account.client.headers)
@ -74,13 +79,18 @@ class Search:
params["cursor" if cursor else "requestContext"] = cursor if cursor else "launch"
rep = await client.get(SEARCH_URL, params=params)
rep.raise_for_status()
self.account.check_limits(rep, cursor)
raise_for_status(rep, "search")
data = rep.json()
cursor = self.get_next_cursor(data)
tweets = data.get("globalObjects", {}).get("tweets", [])
cursor = self.get_next_cursor(data)
# logger.debug(rep.text)
total_count += len(tweets)
self._log(rep, f"{total_count:,d} (+{len(tweets):,d}) tweets - {q}")
if not tweets or not cursor:
return
total_count += len(tweets)
yield rep, data, cursor
yield rep.text