add more graphql methods; add parse to model for graphql responses

Этот коммит содержится в:
Vlad Pronsky 2023-04-29 16:19:33 +03:00
родитель 0b94f6feaa
Коммит d4d867aaab
4 изменённых файлов: 337 добавлений и 151 удалений

Просмотреть файл

@ -6,19 +6,15 @@ import time
from datetime import datetime, timezone
from fake_useragent import UserAgent
from httpx import AsyncClient, Client, HTTPStatusError, Response
from httpx import AsyncClient, Client, Response
from loguru import logger
from .utils import raise_for_status
TOKEN = "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA"
TASK_URL = "https://api.twitter.com/1.1/onboarding/task.json"
class RateLimitExceeded(Exception):
def __init__(self, reset: int, cursor: str | None = None):
self.reset = reset
self.cursor = cursor
def search_email_with_confirmation_code(imap: imaplib.IMAP4_SSL, msg_count: int) -> str | None:
for i in range(msg_count, 0, -1):
_, rep = imap.fetch(str(i), "(RFC822)")
@ -52,13 +48,6 @@ def get_verification_code(email: str, password: str, imap_domain: None | str = N
time.sleep(1)
def raise_for_status(rep: Response, label: str):
try:
rep.raise_for_status()
except HTTPStatusError:
raise Exception(f"{label} - {rep.status_code} - {rep.text}")
def login_get_guest_token(client: Client) -> str:
rep = client.post("https://api.twitter.com/1.1/guest/activate.json")
raise_for_status(rep, "guest_token")

Просмотреть файл

@ -1,5 +1,5 @@
import email.utils
from dataclasses import dataclass
from dataclasses import asdict, dataclass
from datetime import datetime
from typing import Optional
@ -7,13 +7,29 @@ from .utils import get_or, int_or_none
@dataclass
class Coordinates:
longitude: float
latitude: float
class JSONTrait:
def json(self):
return asdict(self)
@dataclass
class Place:
class Coordinates(JSONTrait):
longitude: float
latitude: float
@staticmethod
def parse(tw_obj: dict):
if tw_obj.get("coordinates"):
coords = tw_obj["coordinates"]["coordinates"]
return Coordinates(coords[0], coords[1])
if tw_obj.get("geo"):
coords = tw_obj["geo"]["coordinates"]
return Coordinates(coords[1], coords[0])
return None
@dataclass
class Place(JSONTrait):
id: str
fullName: str
name: str
@ -34,7 +50,7 @@ class Place:
@dataclass
class TextLink:
class TextLink(JSONTrait):
url: str
text: str | None
tcourl: str | None
@ -51,22 +67,18 @@ class TextLink:
@dataclass
class UserRef:
class UserRef(JSONTrait):
id: int
username: str
displayname: str
@staticmethod
def parse(obj: dict):
return UserRef(
id=obj["id"],
username=obj["screen_name"],
displayname=obj["name"],
)
return UserRef(id=int(obj["id_str"]), username=obj["screen_name"], displayname=obj["name"])
@dataclass
class User:
class User(JSONTrait):
id: int
username: str
displayname: str
@ -115,7 +127,7 @@ class User:
@dataclass
class Tweet:
class Tweet(JSONTrait):
id: int
date: datetime
user: User
@ -136,10 +148,6 @@ class Tweet:
place: Optional[Place] = None
coordinates: Optional[Coordinates] = None
@property
def url(self):
return f"https://twitter.com/{self.user.username}/status/{self.id}"
# renderedContent: str
# source: str | None = None
# sourceUrl: str | None = None
@ -150,23 +158,19 @@ class Tweet:
# card: typing.Optional["Card"] = None
# vibe: typing.Optional["Vibe"] = None
@property
def url(self):
return f"https://twitter.com/{self.user.username}/status/{self.id}"
@staticmethod
def parse(obj: dict, res: dict):
rt_obj = get_or(res, f"globalObjects.tweets.{obj.get('retweeted_status_id_str')}")
qt_obj = get_or(res, f"globalObjects.tweets.{obj.get('quoted_status_id_str')}")
coordinates: Coordinates | None = None
if obj.get("coordinates"):
coords = obj["coordinates"]["coordinates"]
coordinates = Coordinates(coords[0], coords[1])
elif obj.get("geo"):
coords = obj["geo"]["coordinates"]
coordinates = Coordinates(coords[1], coords[0])
rt_obj = get_or(res, f"tweets.{obj.get('retweeted_status_id_str')}")
qt_obj = get_or(res, f"tweets.{obj.get('quoted_status_id_str')}")
return Tweet(
id=obj["id"],
id=int(obj["id_str"]),
date=email.utils.parsedate_to_datetime(obj["created_at"]),
user=User.parse(res["globalObjects"]["users"][obj["user_id_str"]]),
user=User.parse(res["users"][obj["user_id_str"]]),
lang=obj["lang"],
rawContent=obj["full_text"],
replyCount=obj["reply_count"],
@ -182,5 +186,5 @@ class Tweet:
retweetedTweet=Tweet.parse(rt_obj, res) if rt_obj else None,
quotedTweet=Tweet.parse(qt_obj, res) if qt_obj else None,
place=Place.parse(obj["place"]) if obj.get("place") else None,
coordinates=coordinates,
coordinates=Coordinates.parse(obj),
)

Просмотреть файл

@ -4,9 +4,9 @@ from typing import Awaitable, Callable
from httpx import AsyncClient, HTTPStatusError, Response
from loguru import logger
from .models import Tweet
from .models import Tweet, User
from .pool import AccountsPool
from .utils import find_item
from .utils import encode_params, find_item, to_old_obj, to_search_like
BASIC_SEARCH_PARAMS = """
include_profile_interstitial_type=1
@ -71,6 +71,7 @@ BASE_FEATURES = {
SEARCH_URL = "https://api.twitter.com/2/search/adaptive.json"
SEARCH_PARAMS = dict(x.split("=") for x in BASIC_SEARCH_PARAMS.splitlines() if x)
GRAPHQL_URL = "https://twitter.com/i/api/graphql/"
def filter_null(obj: dict):
@ -93,6 +94,27 @@ class Search:
def __init__(self, pool: AccountsPool):
self.pool = pool
# http helpers
def _limit_msg(self, rep: Response):
lr = rep.headers.get("x-rate-limit-remaining", -1)
ll = rep.headers.get("x-rate-limit-limit", -1)
return f"{lr}/{ll}"
def _is_end(self, rep: Response, q: str, res: list, cur: str | None, cnt: int, lim: int):
new_count = len(res)
new_total = cnt + new_count
is_res = new_count > 0
is_cur = cur is not None
is_lim = lim > 0 and new_total >= lim
stats = f"{q} {new_total:,d} (+{new_count:,d})"
flags = f"res={int(is_res)} cur={int(is_cur)} lim={int(is_lim)}"
logger.debug(" ".join([stats, flags, self._limit_msg(rep)]))
return new_total, not is_res, not is_cur or is_lim
async def _inf_req(self, queue: str, cb: Callable[[AsyncClient], Awaitable[Response]]):
while True:
account = await self.pool.get_account_or_wait(queue)
@ -114,23 +136,7 @@ class Search:
finally:
account.unlock(queue)
def _check_stop(self, rep: Response, txt: str, cnt: int, res: list, cur: str | None, lim: int):
els = len(res)
is_res, is_cur, is_lim = els > 0, cur is not None, lim > 0 and cnt >= lim
msg = [
f"{txt} {cnt:,d} (+{els:,d}) res={int(is_res)} cur={int(is_cur)} lim={int(is_lim)}",
f"[{rep.headers['x-rate-limit-remaining']}/{rep.headers['x-rate-limit-limit']}]",
]
logger.debug(" ".join(msg))
end_before = not is_res
end_after = not is_cur or is_lim
return cnt + els, end_before, end_after
# search
def get_search_cursor(self, res: dict) -> str | None:
def _get_search_cursor(self, res: dict) -> str | None:
try:
for x in res["timeline"]["instructions"]:
entry = x.get("replaceEntry", None)
@ -144,8 +150,63 @@ class Search:
logger.debug(e)
return None
def get_ql_entries(self, obj: dict) -> list[dict]:
entries = find_item(obj, "entries")
return entries or []
def _get_ql_cursor(self, obj: dict) -> str | None:
try:
for entry in self.get_ql_entries(obj):
if entry["entryId"].startswith("cursor-bottom-"):
return entry["content"]["value"]
return None
except Exception:
return None
async def _ql_items(self, op: str, kv: dict, ft: dict = {}, limit=-1):
queue, cursor, count = op.split("/")[-1], None, 0
async def _get(client: AsyncClient):
params = {"variables": {**kv, "cursor": cursor}, "features": BASE_FEATURES}
return await client.get(f"{GRAPHQL_URL}/{op}", params=encode_params(params))
async for rep in self._inf_req(queue, _get):
obj = rep.json()
# cursor-top / cursor-bottom always present
entries = self.get_ql_entries(obj)
entries = [x for x in entries if not x["entryId"].startswith("cursor-")]
cursor = self._get_ql_cursor(obj)
check = self._is_end(rep, queue, entries, cursor, count, limit)
count, end_before, end_after = check
if end_before:
return
yield rep
if end_after:
return
async def _ql_item(self, op: str, kv: dict, ft: dict = {}):
variables, features = {**kv}, {**BASE_FEATURES, **ft}
params = {"variables": variables, "features": features}
async def _get(client: AsyncClient):
return await client.get(f"{GRAPHQL_URL}/{op}", params=encode_params(params))
queue = op.split("/")[-1]
async for rep in self._inf_req(queue, _get):
logger.debug(f"{queue} {self._limit_msg(rep)}")
return rep
raise Exception("No response") # todo
# search
async def search_raw(self, q: str, limit=-1):
queue, cursor, all_count = "search", None, 0
queue, cursor, count = "search", None, 0
async def _get(client: AsyncClient):
params = {**SEARCH_PARAMS, "q": q, "count": 20}
@ -155,11 +216,11 @@ class Search:
async for rep in self._inf_req(queue, _get):
data = rep.json()
cursor = self.get_search_cursor(data)
cursor = self._get_search_cursor(data)
tweets = data.get("globalObjects", {}).get("tweets", [])
check = self._check_stop(rep, q, all_count, tweets, cursor, limit)
all_count, end_before, end_after = check
check = self._is_end(rep, q, tweets, cursor, count, limit)
count, end_before, end_after = check
if end_before:
return
@ -171,85 +232,160 @@ class Search:
async def search(self, q: str, limit=-1):
async for rep in self.search_raw(q, limit=limit):
data = rep.json()
items = list(data.get("globalObjects", {}).get("tweets", {}).values())
for x in items:
yield Tweet.parse(x, data)
res = rep.json()
obj = res.get("globalObjects", {})
for x in list(obj.get("tweets", {}).values()):
yield Tweet.parse(x, obj)
# graphql
# user_by_id
def get_ql_cursor(self, obj: dict) -> str | None:
try:
for entry in get_ql_entries(obj):
if entry["entryId"].startswith("cursor-bottom-"):
return entry["content"]["value"]
return None
except Exception:
return None
async def graphql_items(self, op: str, variables: dict, features: dict = {}, limit=-1):
url = f"https://twitter.com/i/api/graphql/{op}"
features = {**BASE_FEATURES, **features}
queue, cursor, all_count = op.split("/")[-1], None, 0
async def _get(client: AsyncClient):
params = {"variables": {**variables, "cursor": cursor}, "features": features}
return await client.get(url, params=json_params(params))
async for rep in self._inf_req(queue, _get):
data = rep.json()
entries, cursor = get_ql_entries(data), self.get_ql_cursor(data)
# cursor-top / cursor-bottom always present
items = [x for x in entries if not x["entryId"].startswith("cursor-")]
check = self._check_stop(rep, queue, all_count, items, cursor, limit)
all_count, end_before, end_after = check
if end_before:
return
yield rep
if end_after:
return
async def graphql_item(self, op: str, variables: dict, features: dict = {}):
url = f"https://twitter.com/i/api/graphql/{op}"
features = {**BASE_FEATURES, **features}
async def _get(client: AsyncClient):
params = {"variables": {**variables}, "features": features}
return await client.get(url, params=json_params(params))
queue = op.split("/")[-1]
async for rep in self._inf_req(queue, _get):
msg = [
f"{queue}",
f"[{rep.headers['x-rate-limit-remaining']}/{rep.headers['x-rate-limit-limit']}]",
]
logger.debug(" ".join(msg))
return rep
async def user_by_login(self, login: str):
op = "sLVLhk0bGj3MVFEKTdax1w/UserByScreenName"
kv = {"screen_name": login, "withSafetyModeUserFields": True}
return await self.graphql_item(op, kv)
async def user_by_id(self, uid: int):
async def user_by_id_raw(self, uid: int):
op = "GazOglcBvgLigl3ywt6b3Q/UserByRestId"
kv = {"userId": str(uid), "withSafetyModeUserFields": True}
return await self.graphql_item(op, kv)
return await self._ql_item(op, kv)
async def retweeters(self, twid: int, limit=-1):
async def user_by_id(self, uid: int):
rep = await self.user_by_id_raw(uid)
res = rep.json()
return User.parse(to_old_obj(res["data"]["user"]["result"]))
# user_by_login
async def user_by_login_raw(self, login: str):
op = "sLVLhk0bGj3MVFEKTdax1w/UserByScreenName"
kv = {"screen_name": login, "withSafetyModeUserFields": True}
return await self._ql_item(op, kv)
async def user_by_login(self, login: str):
rep = await self.user_by_login_raw(login)
res = rep.json()
return User.parse(to_old_obj(res["data"]["user"]["result"]))
# tweet_details
async def tweet_details_raw(self, twid: int):
op = "zXaXQgfyR4GxE21uwYQSyA/TweetDetail"
kv = {
"focalTweetId": str(twid),
"referrer": "tweet", # tweet, profile
"with_rux_injections": False,
"includePromotedContent": True,
"withCommunity": True,
"withQuickPromoteEligibilityTweetFields": True,
"withBirdwatchNotes": True,
"withVoice": True,
"withV2Timeline": True,
"withDownvotePerspective": False,
"withReactionsMetadata": False,
"withReactionsPerspective": False,
"withSuperFollowsTweetFields": False,
"withSuperFollowsUserFields": False,
}
ft = {
"responsive_web_twitter_blue_verified_badge_is_enabled": True,
"longform_notetweets_richtext_consumption_enabled": True,
}
return await self._ql_item(op, kv, ft)
async def tweet_details(self, twid: int):
rep = await self.tweet_details_raw(twid)
obj = to_search_like(rep.json())
return Tweet.parse(obj["tweets"][str(twid)], obj)
# followers
async def followers_raw(self, uid: int, limit=-1):
op = "djdTXDIk2qhd4OStqlUFeQ/Followers"
kv = {"userId": str(uid), "count": 20, "includePromotedContent": False}
async for x in self._ql_items(op, kv, limit=limit):
yield x
async def followers(self, uid: int, limit=-1):
async for rep in self.followers_raw(uid, limit=limit):
obj = to_search_like(rep.json())
for _, v in obj["users"].items():
yield User.parse(v)
# following
async def following_raw(self, uid: int, limit=-1):
op = "IWP6Zt14sARO29lJT35bBw/Following"
kv = {"userId": str(uid), "count": 20, "includePromotedContent": False}
async for x in self._ql_items(op, kv, limit=limit):
yield x
async def following(self, uid: int, limit=-1):
async for rep in self.following_raw(uid, limit=limit):
obj = to_search_like(rep.json())
for _, v in obj["users"].items():
yield User.parse(v)
# retweeters
async def retweeters_raw(self, twid: int, limit=-1):
op = "U5f_jm0CiLmSfI1d4rGleQ/Retweeters"
kv = {"tweetId": str(twid), "count": 20, "includePromotedContent": True}
async for x in self.graphql_items(op, kv, limit=limit):
async for x in self._ql_items(op, kv, limit=limit):
yield x
async def retweeters(self, twid: int, limit=-1):
async for rep in self.retweeters_raw(twid, limit=limit):
obj = to_search_like(rep.json())
for _, v in obj["users"].items():
yield User.parse(v)
# favoriters
async def favoriters_raw(self, twid: int, limit=-1):
op = "vcTrPlh9ovFDQejz22q9vg/Favoriters"
kv = {"tweetId": str(twid), "count": 20, "includePromotedContent": True}
async for x in self._ql_items(op, kv, limit=limit):
yield x
async def favoriters(self, twid: int, limit=-1):
op = "vcTrPlh9ovFDQejz22q9vg/Favoriters"
kv = {"tweetId": str(twid), "count": 20, "includePromotedContent": True}
async for x in self.graphql_items(op, kv, limit=limit):
async for rep in self.favoriters_raw(twid, limit=limit):
obj = to_search_like(rep.json())
for _, v in obj["users"].items():
yield User.parse(v)
# user_tweets
async def user_tweets_raw(self, uid: int, limit=-1):
op = "CdG2Vuc1v6F5JyEngGpxVw/UserTweets"
kv = {
"userId": str(uid),
"count": 40,
"includePromotedContent": True,
"withQuickPromoteEligibilityTweetFields": True,
"withVoice": True,
"withV2Timeline": True,
}
async for x in self._ql_items(op, kv, limit=limit):
yield x
async def user_tweets(self, uid: int, limit=-1):
async for rep in self.user_tweets_raw(uid, limit=limit):
obj = to_search_like(rep.json())
for _, v in obj["tweets"].items():
yield Tweet.parse(v, obj)
# user_tweets_and_replies
async def user_tweets_and_replies_raw(self, uid: int, limit=-1):
op = "zQxfEr5IFxQ2QZ-XMJlKew/UserTweetsAndReplies"
kv = {
"userId": str(uid),
"count": 40,
"includePromotedContent": True,
"withCommunity": True,
"withVoice": True,
"withV2Timeline": True,
}
async for x in self._ql_items(op, kv, limit=limit):
yield x
async def user_tweets_and_replies(self, uid: int, limit=-1):
async for rep in self.user_tweets_and_replies_raw(uid, limit=limit):
obj = to_search_like(rep.json())
for _, v in obj["tweets"].items():
yield Tweet.parse(v, obj)

Просмотреть файл

@ -1,8 +1,49 @@
import json
from collections import defaultdict
from typing import Any, TypeVar
from httpx import HTTPStatusError, Response
from loguru import logger
T = TypeVar("T")
def raise_for_status(rep: Response, label: str):
try:
rep.raise_for_status()
except HTTPStatusError as e:
logger.debug(f"{label} - {rep.status_code} - {rep.text}")
raise e
def encode_params(obj: dict):
res = {}
for k, v in obj.items():
if isinstance(v, dict):
v = {a: b for a, b in v.items() if b is not None}
v = json.dumps(v, separators=(",", ":"))
res[k] = str(v)
return res
def get_or(obj: dict, key: str, default_value: T = None) -> Any | T:
for part in key.split("."):
if part not in obj:
return default_value
obj = obj[part]
return obj
def int_or_none(obj: dict, key: str):
try:
val = get_or(obj, key)
return int(val) if val is not None else None
except Exception:
return None
# https://stackoverflow.com/a/43184871
def find_item(obj: dict, key: str, default=None):
stack = [iter(obj.items())]
@ -21,17 +62,33 @@ def find_item(obj: dict, key: str, default=None):
return default
def get_or(obj: dict, key: str, default_value: T = None) -> Any | T:
for part in key.split("."):
if part not in obj:
return default_value
obj = obj[part]
return obj
def get_typed_object(obj: dict, res: defaultdict[str, list]):
obj_type = obj.get("__typename", None)
if obj_type is not None:
res[obj_type].append(obj)
for k, v in obj.items():
if isinstance(v, dict):
get_typed_object(v, res)
elif isinstance(v, list):
for x in v:
if isinstance(x, dict):
get_typed_object(x, res)
return res
def int_or_none(obj: dict, key: str):
try:
val = get_or(obj, key)
return int(val) if val is not None else None
except Exception:
return None
def to_old_obj(obj: dict):
return {**obj, **obj["legacy"], "id_str": str(obj["rest_id"]), "id": int(obj["rest_id"])}
def to_search_like(obj: dict):
tmp = get_typed_object(obj, defaultdict(list))
tweets = [x for x in tmp.get("Tweet", []) if "legacy" in x]
tweets = {str(x["rest_id"]): to_old_obj(x) for x in tweets}
users = [x for x in tmp.get("User", []) if "legacy" in x and "id" in x]
users = {str(x["rest_id"]): to_old_obj(x) for x in users}
return {"tweets": tweets, "users": users}