add more graphql methods; add parse to model for graphql responses

Этот коммит содержится в:
Vlad Pronsky 2023-04-29 16:19:33 +03:00
родитель 0b94f6feaa
Коммит d4d867aaab
4 изменённых файлов: 337 добавлений и 151 удалений

Просмотреть файл

@ -6,19 +6,15 @@ import time
from datetime import datetime, timezone from datetime import datetime, timezone
from fake_useragent import UserAgent from fake_useragent import UserAgent
from httpx import AsyncClient, Client, HTTPStatusError, Response from httpx import AsyncClient, Client, Response
from loguru import logger from loguru import logger
from .utils import raise_for_status
TOKEN = "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA" TOKEN = "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA"
TASK_URL = "https://api.twitter.com/1.1/onboarding/task.json" TASK_URL = "https://api.twitter.com/1.1/onboarding/task.json"
class RateLimitExceeded(Exception):
def __init__(self, reset: int, cursor: str | None = None):
self.reset = reset
self.cursor = cursor
def search_email_with_confirmation_code(imap: imaplib.IMAP4_SSL, msg_count: int) -> str | None: def search_email_with_confirmation_code(imap: imaplib.IMAP4_SSL, msg_count: int) -> str | None:
for i in range(msg_count, 0, -1): for i in range(msg_count, 0, -1):
_, rep = imap.fetch(str(i), "(RFC822)") _, rep = imap.fetch(str(i), "(RFC822)")
@ -52,13 +48,6 @@ def get_verification_code(email: str, password: str, imap_domain: None | str = N
time.sleep(1) time.sleep(1)
def raise_for_status(rep: Response, label: str):
try:
rep.raise_for_status()
except HTTPStatusError:
raise Exception(f"{label} - {rep.status_code} - {rep.text}")
def login_get_guest_token(client: Client) -> str: def login_get_guest_token(client: Client) -> str:
rep = client.post("https://api.twitter.com/1.1/guest/activate.json") rep = client.post("https://api.twitter.com/1.1/guest/activate.json")
raise_for_status(rep, "guest_token") raise_for_status(rep, "guest_token")

Просмотреть файл

@ -1,5 +1,5 @@
import email.utils import email.utils
from dataclasses import dataclass from dataclasses import asdict, dataclass
from datetime import datetime from datetime import datetime
from typing import Optional from typing import Optional
@ -7,13 +7,29 @@ from .utils import get_or, int_or_none
@dataclass @dataclass
class Coordinates: class JSONTrait:
longitude: float def json(self):
latitude: float return asdict(self)
@dataclass @dataclass
class Place: class Coordinates(JSONTrait):
longitude: float
latitude: float
@staticmethod
def parse(tw_obj: dict):
if tw_obj.get("coordinates"):
coords = tw_obj["coordinates"]["coordinates"]
return Coordinates(coords[0], coords[1])
if tw_obj.get("geo"):
coords = tw_obj["geo"]["coordinates"]
return Coordinates(coords[1], coords[0])
return None
@dataclass
class Place(JSONTrait):
id: str id: str
fullName: str fullName: str
name: str name: str
@ -34,7 +50,7 @@ class Place:
@dataclass @dataclass
class TextLink: class TextLink(JSONTrait):
url: str url: str
text: str | None text: str | None
tcourl: str | None tcourl: str | None
@ -51,22 +67,18 @@ class TextLink:
@dataclass @dataclass
class UserRef: class UserRef(JSONTrait):
id: int id: int
username: str username: str
displayname: str displayname: str
@staticmethod @staticmethod
def parse(obj: dict): def parse(obj: dict):
return UserRef( return UserRef(id=int(obj["id_str"]), username=obj["screen_name"], displayname=obj["name"])
id=obj["id"],
username=obj["screen_name"],
displayname=obj["name"],
)
@dataclass @dataclass
class User: class User(JSONTrait):
id: int id: int
username: str username: str
displayname: str displayname: str
@ -115,7 +127,7 @@ class User:
@dataclass @dataclass
class Tweet: class Tweet(JSONTrait):
id: int id: int
date: datetime date: datetime
user: User user: User
@ -136,10 +148,6 @@ class Tweet:
place: Optional[Place] = None place: Optional[Place] = None
coordinates: Optional[Coordinates] = None coordinates: Optional[Coordinates] = None
@property
def url(self):
return f"https://twitter.com/{self.user.username}/status/{self.id}"
# renderedContent: str # renderedContent: str
# source: str | None = None # source: str | None = None
# sourceUrl: str | None = None # sourceUrl: str | None = None
@ -150,23 +158,19 @@ class Tweet:
# card: typing.Optional["Card"] = None # card: typing.Optional["Card"] = None
# vibe: typing.Optional["Vibe"] = None # vibe: typing.Optional["Vibe"] = None
@property
def url(self):
return f"https://twitter.com/{self.user.username}/status/{self.id}"
@staticmethod @staticmethod
def parse(obj: dict, res: dict): def parse(obj: dict, res: dict):
rt_obj = get_or(res, f"globalObjects.tweets.{obj.get('retweeted_status_id_str')}") rt_obj = get_or(res, f"tweets.{obj.get('retweeted_status_id_str')}")
qt_obj = get_or(res, f"globalObjects.tweets.{obj.get('quoted_status_id_str')}") qt_obj = get_or(res, f"tweets.{obj.get('quoted_status_id_str')}")
coordinates: Coordinates | None = None
if obj.get("coordinates"):
coords = obj["coordinates"]["coordinates"]
coordinates = Coordinates(coords[0], coords[1])
elif obj.get("geo"):
coords = obj["geo"]["coordinates"]
coordinates = Coordinates(coords[1], coords[0])
return Tweet( return Tweet(
id=obj["id"], id=int(obj["id_str"]),
date=email.utils.parsedate_to_datetime(obj["created_at"]), date=email.utils.parsedate_to_datetime(obj["created_at"]),
user=User.parse(res["globalObjects"]["users"][obj["user_id_str"]]), user=User.parse(res["users"][obj["user_id_str"]]),
lang=obj["lang"], lang=obj["lang"],
rawContent=obj["full_text"], rawContent=obj["full_text"],
replyCount=obj["reply_count"], replyCount=obj["reply_count"],
@ -182,5 +186,5 @@ class Tweet:
retweetedTweet=Tweet.parse(rt_obj, res) if rt_obj else None, retweetedTweet=Tweet.parse(rt_obj, res) if rt_obj else None,
quotedTweet=Tweet.parse(qt_obj, res) if qt_obj else None, quotedTweet=Tweet.parse(qt_obj, res) if qt_obj else None,
place=Place.parse(obj["place"]) if obj.get("place") else None, place=Place.parse(obj["place"]) if obj.get("place") else None,
coordinates=coordinates, coordinates=Coordinates.parse(obj),
) )

Просмотреть файл

@ -4,9 +4,9 @@ from typing import Awaitable, Callable
from httpx import AsyncClient, HTTPStatusError, Response from httpx import AsyncClient, HTTPStatusError, Response
from loguru import logger from loguru import logger
from .models import Tweet from .models import Tweet, User
from .pool import AccountsPool from .pool import AccountsPool
from .utils import find_item from .utils import encode_params, find_item, to_old_obj, to_search_like
BASIC_SEARCH_PARAMS = """ BASIC_SEARCH_PARAMS = """
include_profile_interstitial_type=1 include_profile_interstitial_type=1
@ -71,6 +71,7 @@ BASE_FEATURES = {
SEARCH_URL = "https://api.twitter.com/2/search/adaptive.json" SEARCH_URL = "https://api.twitter.com/2/search/adaptive.json"
SEARCH_PARAMS = dict(x.split("=") for x in BASIC_SEARCH_PARAMS.splitlines() if x) SEARCH_PARAMS = dict(x.split("=") for x in BASIC_SEARCH_PARAMS.splitlines() if x)
GRAPHQL_URL = "https://twitter.com/i/api/graphql/"
def filter_null(obj: dict): def filter_null(obj: dict):
@ -93,6 +94,27 @@ class Search:
def __init__(self, pool: AccountsPool): def __init__(self, pool: AccountsPool):
self.pool = pool self.pool = pool
# http helpers
def _limit_msg(self, rep: Response):
lr = rep.headers.get("x-rate-limit-remaining", -1)
ll = rep.headers.get("x-rate-limit-limit", -1)
return f"{lr}/{ll}"
def _is_end(self, rep: Response, q: str, res: list, cur: str | None, cnt: int, lim: int):
new_count = len(res)
new_total = cnt + new_count
is_res = new_count > 0
is_cur = cur is not None
is_lim = lim > 0 and new_total >= lim
stats = f"{q} {new_total:,d} (+{new_count:,d})"
flags = f"res={int(is_res)} cur={int(is_cur)} lim={int(is_lim)}"
logger.debug(" ".join([stats, flags, self._limit_msg(rep)]))
return new_total, not is_res, not is_cur or is_lim
async def _inf_req(self, queue: str, cb: Callable[[AsyncClient], Awaitable[Response]]): async def _inf_req(self, queue: str, cb: Callable[[AsyncClient], Awaitable[Response]]):
while True: while True:
account = await self.pool.get_account_or_wait(queue) account = await self.pool.get_account_or_wait(queue)
@ -114,23 +136,7 @@ class Search:
finally: finally:
account.unlock(queue) account.unlock(queue)
def _check_stop(self, rep: Response, txt: str, cnt: int, res: list, cur: str | None, lim: int): def _get_search_cursor(self, res: dict) -> str | None:
els = len(res)
is_res, is_cur, is_lim = els > 0, cur is not None, lim > 0 and cnt >= lim
msg = [
f"{txt} {cnt:,d} (+{els:,d}) res={int(is_res)} cur={int(is_cur)} lim={int(is_lim)}",
f"[{rep.headers['x-rate-limit-remaining']}/{rep.headers['x-rate-limit-limit']}]",
]
logger.debug(" ".join(msg))
end_before = not is_res
end_after = not is_cur or is_lim
return cnt + els, end_before, end_after
# search
def get_search_cursor(self, res: dict) -> str | None:
try: try:
for x in res["timeline"]["instructions"]: for x in res["timeline"]["instructions"]:
entry = x.get("replaceEntry", None) entry = x.get("replaceEntry", None)
@ -144,8 +150,63 @@ class Search:
logger.debug(e) logger.debug(e)
return None return None
def get_ql_entries(self, obj: dict) -> list[dict]:
entries = find_item(obj, "entries")
return entries or []
def _get_ql_cursor(self, obj: dict) -> str | None:
try:
for entry in self.get_ql_entries(obj):
if entry["entryId"].startswith("cursor-bottom-"):
return entry["content"]["value"]
return None
except Exception:
return None
async def _ql_items(self, op: str, kv: dict, ft: dict = {}, limit=-1):
queue, cursor, count = op.split("/")[-1], None, 0
async def _get(client: AsyncClient):
params = {"variables": {**kv, "cursor": cursor}, "features": BASE_FEATURES}
return await client.get(f"{GRAPHQL_URL}/{op}", params=encode_params(params))
async for rep in self._inf_req(queue, _get):
obj = rep.json()
# cursor-top / cursor-bottom always present
entries = self.get_ql_entries(obj)
entries = [x for x in entries if not x["entryId"].startswith("cursor-")]
cursor = self._get_ql_cursor(obj)
check = self._is_end(rep, queue, entries, cursor, count, limit)
count, end_before, end_after = check
if end_before:
return
yield rep
if end_after:
return
async def _ql_item(self, op: str, kv: dict, ft: dict = {}):
variables, features = {**kv}, {**BASE_FEATURES, **ft}
params = {"variables": variables, "features": features}
async def _get(client: AsyncClient):
return await client.get(f"{GRAPHQL_URL}/{op}", params=encode_params(params))
queue = op.split("/")[-1]
async for rep in self._inf_req(queue, _get):
logger.debug(f"{queue} {self._limit_msg(rep)}")
return rep
raise Exception("No response") # todo
# search
async def search_raw(self, q: str, limit=-1): async def search_raw(self, q: str, limit=-1):
queue, cursor, all_count = "search", None, 0 queue, cursor, count = "search", None, 0
async def _get(client: AsyncClient): async def _get(client: AsyncClient):
params = {**SEARCH_PARAMS, "q": q, "count": 20} params = {**SEARCH_PARAMS, "q": q, "count": 20}
@ -155,11 +216,11 @@ class Search:
async for rep in self._inf_req(queue, _get): async for rep in self._inf_req(queue, _get):
data = rep.json() data = rep.json()
cursor = self.get_search_cursor(data) cursor = self._get_search_cursor(data)
tweets = data.get("globalObjects", {}).get("tweets", []) tweets = data.get("globalObjects", {}).get("tweets", [])
check = self._check_stop(rep, q, all_count, tweets, cursor, limit) check = self._is_end(rep, q, tweets, cursor, count, limit)
all_count, end_before, end_after = check count, end_before, end_after = check
if end_before: if end_before:
return return
@ -171,85 +232,160 @@ class Search:
async def search(self, q: str, limit=-1): async def search(self, q: str, limit=-1):
async for rep in self.search_raw(q, limit=limit): async for rep in self.search_raw(q, limit=limit):
data = rep.json() res = rep.json()
items = list(data.get("globalObjects", {}).get("tweets", {}).values()) obj = res.get("globalObjects", {})
for x in items: for x in list(obj.get("tweets", {}).values()):
yield Tweet.parse(x, data) yield Tweet.parse(x, obj)
# graphql # user_by_id
def get_ql_cursor(self, obj: dict) -> str | None: async def user_by_id_raw(self, uid: int):
try:
for entry in get_ql_entries(obj):
if entry["entryId"].startswith("cursor-bottom-"):
return entry["content"]["value"]
return None
except Exception:
return None
async def graphql_items(self, op: str, variables: dict, features: dict = {}, limit=-1):
url = f"https://twitter.com/i/api/graphql/{op}"
features = {**BASE_FEATURES, **features}
queue, cursor, all_count = op.split("/")[-1], None, 0
async def _get(client: AsyncClient):
params = {"variables": {**variables, "cursor": cursor}, "features": features}
return await client.get(url, params=json_params(params))
async for rep in self._inf_req(queue, _get):
data = rep.json()
entries, cursor = get_ql_entries(data), self.get_ql_cursor(data)
# cursor-top / cursor-bottom always present
items = [x for x in entries if not x["entryId"].startswith("cursor-")]
check = self._check_stop(rep, queue, all_count, items, cursor, limit)
all_count, end_before, end_after = check
if end_before:
return
yield rep
if end_after:
return
async def graphql_item(self, op: str, variables: dict, features: dict = {}):
url = f"https://twitter.com/i/api/graphql/{op}"
features = {**BASE_FEATURES, **features}
async def _get(client: AsyncClient):
params = {"variables": {**variables}, "features": features}
return await client.get(url, params=json_params(params))
queue = op.split("/")[-1]
async for rep in self._inf_req(queue, _get):
msg = [
f"{queue}",
f"[{rep.headers['x-rate-limit-remaining']}/{rep.headers['x-rate-limit-limit']}]",
]
logger.debug(" ".join(msg))
return rep
async def user_by_login(self, login: str):
op = "sLVLhk0bGj3MVFEKTdax1w/UserByScreenName"
kv = {"screen_name": login, "withSafetyModeUserFields": True}
return await self.graphql_item(op, kv)
async def user_by_id(self, uid: int):
op = "GazOglcBvgLigl3ywt6b3Q/UserByRestId" op = "GazOglcBvgLigl3ywt6b3Q/UserByRestId"
kv = {"userId": str(uid), "withSafetyModeUserFields": True} kv = {"userId": str(uid), "withSafetyModeUserFields": True}
return await self.graphql_item(op, kv) return await self._ql_item(op, kv)
async def retweeters(self, twid: int, limit=-1): async def user_by_id(self, uid: int):
rep = await self.user_by_id_raw(uid)
res = rep.json()
return User.parse(to_old_obj(res["data"]["user"]["result"]))
# user_by_login
async def user_by_login_raw(self, login: str):
op = "sLVLhk0bGj3MVFEKTdax1w/UserByScreenName"
kv = {"screen_name": login, "withSafetyModeUserFields": True}
return await self._ql_item(op, kv)
async def user_by_login(self, login: str):
rep = await self.user_by_login_raw(login)
res = rep.json()
return User.parse(to_old_obj(res["data"]["user"]["result"]))
# tweet_details
async def tweet_details_raw(self, twid: int):
op = "zXaXQgfyR4GxE21uwYQSyA/TweetDetail"
kv = {
"focalTweetId": str(twid),
"referrer": "tweet", # tweet, profile
"with_rux_injections": False,
"includePromotedContent": True,
"withCommunity": True,
"withQuickPromoteEligibilityTweetFields": True,
"withBirdwatchNotes": True,
"withVoice": True,
"withV2Timeline": True,
"withDownvotePerspective": False,
"withReactionsMetadata": False,
"withReactionsPerspective": False,
"withSuperFollowsTweetFields": False,
"withSuperFollowsUserFields": False,
}
ft = {
"responsive_web_twitter_blue_verified_badge_is_enabled": True,
"longform_notetweets_richtext_consumption_enabled": True,
}
return await self._ql_item(op, kv, ft)
async def tweet_details(self, twid: int):
rep = await self.tweet_details_raw(twid)
obj = to_search_like(rep.json())
return Tweet.parse(obj["tweets"][str(twid)], obj)
# followers
async def followers_raw(self, uid: int, limit=-1):
op = "djdTXDIk2qhd4OStqlUFeQ/Followers"
kv = {"userId": str(uid), "count": 20, "includePromotedContent": False}
async for x in self._ql_items(op, kv, limit=limit):
yield x
async def followers(self, uid: int, limit=-1):
async for rep in self.followers_raw(uid, limit=limit):
obj = to_search_like(rep.json())
for _, v in obj["users"].items():
yield User.parse(v)
# following
async def following_raw(self, uid: int, limit=-1):
op = "IWP6Zt14sARO29lJT35bBw/Following"
kv = {"userId": str(uid), "count": 20, "includePromotedContent": False}
async for x in self._ql_items(op, kv, limit=limit):
yield x
async def following(self, uid: int, limit=-1):
async for rep in self.following_raw(uid, limit=limit):
obj = to_search_like(rep.json())
for _, v in obj["users"].items():
yield User.parse(v)
# retweeters
async def retweeters_raw(self, twid: int, limit=-1):
op = "U5f_jm0CiLmSfI1d4rGleQ/Retweeters" op = "U5f_jm0CiLmSfI1d4rGleQ/Retweeters"
kv = {"tweetId": str(twid), "count": 20, "includePromotedContent": True} kv = {"tweetId": str(twid), "count": 20, "includePromotedContent": True}
async for x in self.graphql_items(op, kv, limit=limit): async for x in self._ql_items(op, kv, limit=limit):
yield x
async def retweeters(self, twid: int, limit=-1):
async for rep in self.retweeters_raw(twid, limit=limit):
obj = to_search_like(rep.json())
for _, v in obj["users"].items():
yield User.parse(v)
# favoriters
async def favoriters_raw(self, twid: int, limit=-1):
op = "vcTrPlh9ovFDQejz22q9vg/Favoriters"
kv = {"tweetId": str(twid), "count": 20, "includePromotedContent": True}
async for x in self._ql_items(op, kv, limit=limit):
yield x yield x
async def favoriters(self, twid: int, limit=-1): async def favoriters(self, twid: int, limit=-1):
op = "vcTrPlh9ovFDQejz22q9vg/Favoriters" async for rep in self.favoriters_raw(twid, limit=limit):
kv = {"tweetId": str(twid), "count": 20, "includePromotedContent": True} obj = to_search_like(rep.json())
async for x in self.graphql_items(op, kv, limit=limit): for _, v in obj["users"].items():
yield User.parse(v)
# user_tweets
async def user_tweets_raw(self, uid: int, limit=-1):
op = "CdG2Vuc1v6F5JyEngGpxVw/UserTweets"
kv = {
"userId": str(uid),
"count": 40,
"includePromotedContent": True,
"withQuickPromoteEligibilityTweetFields": True,
"withVoice": True,
"withV2Timeline": True,
}
async for x in self._ql_items(op, kv, limit=limit):
yield x yield x
async def user_tweets(self, uid: int, limit=-1):
async for rep in self.user_tweets_raw(uid, limit=limit):
obj = to_search_like(rep.json())
for _, v in obj["tweets"].items():
yield Tweet.parse(v, obj)
# user_tweets_and_replies
async def user_tweets_and_replies_raw(self, uid: int, limit=-1):
op = "zQxfEr5IFxQ2QZ-XMJlKew/UserTweetsAndReplies"
kv = {
"userId": str(uid),
"count": 40,
"includePromotedContent": True,
"withCommunity": True,
"withVoice": True,
"withV2Timeline": True,
}
async for x in self._ql_items(op, kv, limit=limit):
yield x
async def user_tweets_and_replies(self, uid: int, limit=-1):
async for rep in self.user_tweets_and_replies_raw(uid, limit=limit):
obj = to_search_like(rep.json())
for _, v in obj["tweets"].items():
yield Tweet.parse(v, obj)

Просмотреть файл

@ -1,8 +1,49 @@
import json
from collections import defaultdict
from typing import Any, TypeVar from typing import Any, TypeVar
from httpx import HTTPStatusError, Response
from loguru import logger
T = TypeVar("T") T = TypeVar("T")
def raise_for_status(rep: Response, label: str):
try:
rep.raise_for_status()
except HTTPStatusError as e:
logger.debug(f"{label} - {rep.status_code} - {rep.text}")
raise e
def encode_params(obj: dict):
res = {}
for k, v in obj.items():
if isinstance(v, dict):
v = {a: b for a, b in v.items() if b is not None}
v = json.dumps(v, separators=(",", ":"))
res[k] = str(v)
return res
def get_or(obj: dict, key: str, default_value: T = None) -> Any | T:
for part in key.split("."):
if part not in obj:
return default_value
obj = obj[part]
return obj
def int_or_none(obj: dict, key: str):
try:
val = get_or(obj, key)
return int(val) if val is not None else None
except Exception:
return None
# https://stackoverflow.com/a/43184871 # https://stackoverflow.com/a/43184871
def find_item(obj: dict, key: str, default=None): def find_item(obj: dict, key: str, default=None):
stack = [iter(obj.items())] stack = [iter(obj.items())]
@ -21,17 +62,33 @@ def find_item(obj: dict, key: str, default=None):
return default return default
def get_or(obj: dict, key: str, default_value: T = None) -> Any | T: def get_typed_object(obj: dict, res: defaultdict[str, list]):
for part in key.split("."): obj_type = obj.get("__typename", None)
if part not in obj: if obj_type is not None:
return default_value res[obj_type].append(obj)
obj = obj[part]
return obj for k, v in obj.items():
if isinstance(v, dict):
get_typed_object(v, res)
elif isinstance(v, list):
for x in v:
if isinstance(x, dict):
get_typed_object(x, res)
return res
def int_or_none(obj: dict, key: str): def to_old_obj(obj: dict):
try: return {**obj, **obj["legacy"], "id_str": str(obj["rest_id"]), "id": int(obj["rest_id"])}
val = get_or(obj, key)
return int(val) if val is not None else None
except Exception: def to_search_like(obj: dict):
return None tmp = get_typed_object(obj, defaultdict(list))
tweets = [x for x in tmp.get("Tweet", []) if "legacy" in x]
tweets = {str(x["rest_id"]): to_old_obj(x) for x in tweets}
users = [x for x in tmp.get("User", []) if "legacy" in x and "id" in x]
users = {str(x["rest_id"]): to_old_obj(x) for x in users}
return {"tweets": tweets, "users": users}