twscrape/twapi/api.py

327 строки
12 KiB
Python

import time
from typing import Awaitable, Callable
from httpx import AsyncClient, HTTPStatusError, Response
from .accounts_pool import AccountsPool
from .constants import GQL_FEATURES, GQL_URL, SEARCH_PARAMS, SEARCH_URL
from .logger import logger
from .models import Tweet, User
from .utils import encode_params, get_by_path, to_old_obj, to_search_like
class API:
def __init__(self, pool: AccountsPool):
self.pool = pool
# http helpers
def _limit_msg(self, rep: Response):
lr = rep.headers.get("x-rate-limit-remaining", -1)
ll = rep.headers.get("x-rate-limit-limit", -1)
auth_token = rep.request.headers["cookie"].split("auth_token=")[1].split(";")[0]
username = self.pool.get_username_by_token(auth_token)
return f"{username} {lr}/{ll}"
def _is_end(self, rep: Response, q: str, res: list, cur: str | None, cnt: int, lim: int):
new_count = len(res)
new_total = cnt + new_count
is_res = new_count > 0
is_cur = cur is not None
is_lim = lim > 0 and new_total >= lim
stats = f"{q} {new_total:,d} (+{new_count:,d})"
flags = f"res={int(is_res)} cur={int(is_cur)} lim={int(is_lim)}"
logger.debug(" ".join([stats, flags, self._limit_msg(rep)]))
return new_total, not is_res, not is_cur or is_lim
async def _inf_req(self, queue: str, cb: Callable[[AsyncClient], Awaitable[Response]]):
while True:
account = await self.pool.get_account_or_wait(queue)
try:
while True:
rep = await cb(account.client)
rep.raise_for_status()
yield rep
except HTTPStatusError as e:
if e.response.status_code == 429:
logger.debug(f"Rate limit for account={account.username} on queue={queue}")
reset_ts = int(e.response.headers.get("x-rate-limit-reset", 0))
self.pool.update_limit(account, queue, reset_ts)
continue
if e.response.status_code == 403:
logger.debug(f"Account={account.username} is banned on queue={queue}")
reset_ts = int(time.time() + 60 * 60) # 1 hour
self.pool.update_limit(account, queue, reset_ts)
continue
logger.error(f"[{e.response.status_code}] {e.request.url}\n{e.response.text}")
raise e
finally:
account.unlock(queue)
def _get_search_cursor(self, res: dict) -> str | None:
try:
for x in res["timeline"]["instructions"]:
entry = x.get("replaceEntry", None)
if entry is not None and entry["entryIdToReplace"] == "sq-cursor-bottom":
return entry["entry"]["content"]["operation"]["cursor"]["value"]
for entry in x.get("addEntries", {}).get("entries", []):
if entry["entryId"] == "sq-cursor-bottom":
return entry["content"]["operation"]["cursor"]["value"]
except Exception as e:
logger.debug(e)
return None
def _get_ql_entries(self, obj: dict) -> list[dict]:
entries = get_by_path(obj, "entries")
return entries or []
def _get_ql_cursor(self, obj: dict) -> str | None:
try:
for entry in self._get_ql_entries(obj):
if entry["entryId"].startswith("cursor-bottom-"):
return entry["content"]["value"]
return None
except Exception:
return None
async def _ql_items(self, op: str, kv: dict, limit=-1):
queue, cursor, count = op.split("/")[-1], None, 0
async def _get(client: AsyncClient):
params = {"variables": {**kv, "cursor": cursor}, "features": GQL_FEATURES}
return await client.get(f"{GQL_URL}/{op}", params=encode_params(params))
async for rep in self._inf_req(queue, _get):
obj = rep.json()
# cursor-top / cursor-bottom always present
entries = self._get_ql_entries(obj)
entries = [x for x in entries if not x["entryId"].startswith("cursor-")]
cursor = self._get_ql_cursor(obj)
check = self._is_end(rep, queue, entries, cursor, count, limit)
count, end_before, end_after = check
if end_before:
return
yield rep
if end_after:
return
async def _ql_item(self, op: str, kv: dict, ft: dict = {}):
async def _get(client: AsyncClient):
params = {"variables": {**kv}, "features": {**GQL_FEATURES, **ft}}
return await client.get(f"{GQL_URL}/{op}", params=encode_params(params))
queue = op.split("/")[-1]
async for rep in self._inf_req(queue, _get):
logger.debug(f"{queue} {self._limit_msg(rep)}")
return rep
raise Exception("No response") # todo
# search
async def search_raw(self, q: str, limit=-1):
queue, cursor, count = "search", None, 0
async def _get(client: AsyncClient):
params = {**SEARCH_PARAMS, "q": q, "count": 20}
params["cursor" if cursor else "requestContext"] = cursor if cursor else "launch"
return await client.get(SEARCH_URL, params=params)
retries = 0
async for rep in self._inf_req(queue, _get):
data = rep.json()
tweets = data.get("globalObjects", {}).get("tweets", [])
if not tweets and retries < 3:
retries += 1
continue
else:
retries = 0
cursor = self._get_search_cursor(data)
check = self._is_end(rep, q, tweets, cursor, count, limit)
count, end_before, end_after = check
if end_before:
return
yield rep
if end_after:
return
async def search(self, q: str, limit=-1):
async for rep in self.search_raw(q, limit=limit):
res = rep.json()
obj = res.get("globalObjects", {})
for x in list(obj.get("tweets", {}).values()):
yield Tweet.parse(x, obj)
# user_by_id
async def user_by_id_raw(self, uid: int):
op = "GazOglcBvgLigl3ywt6b3Q/UserByRestId"
kv = {"userId": str(uid), "withSafetyModeUserFields": True}
return await self._ql_item(op, kv)
async def user_by_id(self, uid: int):
rep = await self.user_by_id_raw(uid)
res = rep.json()
return User.parse(to_old_obj(res["data"]["user"]["result"]))
# user_by_login
async def user_by_login_raw(self, login: str):
op = "sLVLhk0bGj3MVFEKTdax1w/UserByScreenName"
kv = {"screen_name": login, "withSafetyModeUserFields": True}
return await self._ql_item(op, kv)
async def user_by_login(self, login: str):
rep = await self.user_by_login_raw(login)
res = rep.json()
return User.parse(to_old_obj(res["data"]["user"]["result"]))
# tweet_details
async def tweet_details_raw(self, twid: int):
op = "zXaXQgfyR4GxE21uwYQSyA/TweetDetail"
kv = {
"focalTweetId": str(twid),
"referrer": "tweet", # tweet, profile
"with_rux_injections": False,
"includePromotedContent": True,
"withCommunity": True,
"withQuickPromoteEligibilityTweetFields": True,
"withBirdwatchNotes": True,
"withVoice": True,
"withV2Timeline": True,
"withDownvotePerspective": False,
"withReactionsMetadata": False,
"withReactionsPerspective": False,
"withSuperFollowsTweetFields": False,
"withSuperFollowsUserFields": False,
}
ft = {
"responsive_web_twitter_blue_verified_badge_is_enabled": True,
"longform_notetweets_richtext_consumption_enabled": True,
}
return await self._ql_item(op, kv, ft)
async def tweet_details(self, twid: int):
rep = await self.tweet_details_raw(twid)
obj = to_search_like(rep.json())
return Tweet.parse(obj["tweets"][str(twid)], obj)
# followers
async def followers_raw(self, uid: int, limit=-1):
op = "djdTXDIk2qhd4OStqlUFeQ/Followers"
kv = {"userId": str(uid), "count": 20, "includePromotedContent": False}
async for x in self._ql_items(op, kv, limit=limit):
yield x
async def followers(self, uid: int, limit=-1):
async for rep in self.followers_raw(uid, limit=limit):
obj = to_search_like(rep.json())
for _, v in obj["users"].items():
yield User.parse(v)
# following
async def following_raw(self, uid: int, limit=-1):
op = "IWP6Zt14sARO29lJT35bBw/Following"
kv = {"userId": str(uid), "count": 20, "includePromotedContent": False}
async for x in self._ql_items(op, kv, limit=limit):
yield x
async def following(self, uid: int, limit=-1):
async for rep in self.following_raw(uid, limit=limit):
obj = to_search_like(rep.json())
for _, v in obj["users"].items():
yield User.parse(v)
# retweeters
async def retweeters_raw(self, twid: int, limit=-1):
op = "U5f_jm0CiLmSfI1d4rGleQ/Retweeters"
kv = {"tweetId": str(twid), "count": 20, "includePromotedContent": True}
async for x in self._ql_items(op, kv, limit=limit):
yield x
async def retweeters(self, twid: int, limit=-1):
async for rep in self.retweeters_raw(twid, limit=limit):
obj = to_search_like(rep.json())
for _, v in obj["users"].items():
yield User.parse(v)
# favoriters
async def favoriters_raw(self, twid: int, limit=-1):
op = "vcTrPlh9ovFDQejz22q9vg/Favoriters"
kv = {"tweetId": str(twid), "count": 20, "includePromotedContent": True}
async for x in self._ql_items(op, kv, limit=limit):
yield x
async def favoriters(self, twid: int, limit=-1):
async for rep in self.favoriters_raw(twid, limit=limit):
obj = to_search_like(rep.json())
for _, v in obj["users"].items():
yield User.parse(v)
# user_tweets
async def user_tweets_raw(self, uid: int, limit=-1):
op = "CdG2Vuc1v6F5JyEngGpxVw/UserTweets"
kv = {
"userId": str(uid),
"count": 40,
"includePromotedContent": True,
"withQuickPromoteEligibilityTweetFields": True,
"withVoice": True,
"withV2Timeline": True,
}
async for x in self._ql_items(op, kv, limit=limit):
yield x
async def user_tweets(self, uid: int, limit=-1):
async for rep in self.user_tweets_raw(uid, limit=limit):
obj = to_search_like(rep.json())
for _, v in obj["tweets"].items():
yield Tweet.parse(v, obj)
# user_tweets_and_replies
async def user_tweets_and_replies_raw(self, uid: int, limit=-1):
op = "zQxfEr5IFxQ2QZ-XMJlKew/UserTweetsAndReplies"
kv = {
"userId": str(uid),
"count": 40,
"includePromotedContent": True,
"withCommunity": True,
"withVoice": True,
"withV2Timeline": True,
}
async for x in self._ql_items(op, kv, limit=limit):
yield x
async def user_tweets_and_replies(self, uid: int, limit=-1):
async for rep in self.user_tweets_and_replies_raw(uid, limit=limit):
obj = to_search_like(rep.json())
for _, v in obj["tweets"].items():
yield Tweet.parse(v, obj)