add more graphql methods; add parse to model for graphql responses

2025-10-29 13:06:13 +02:00 · 2023-04-29 16:19:33 +03:00 · 2023-04-29 16:19:33 +03:00 · d4d867aaab
--- a/twapi/client.py
+++ b/twapi/client.py
@ -6,19 +6,15 @@ import time
 from datetime import datetime, timezone

 from fake_useragent import UserAgent
-from httpx import AsyncClient, Client, HTTPStatusError, Response
+from httpx import AsyncClient, Client, Response
 from loguru import logger

+from .utils import raise_for_status
+
 TOKEN = "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA"
 TASK_URL = "https://api.twitter.com/1.1/onboarding/task.json"


-class RateLimitExceeded(Exception):
-    def __init__(self, reset: int, cursor: str | None = None):
-        self.reset = reset
-        self.cursor = cursor
-
-
 def search_email_with_confirmation_code(imap: imaplib.IMAP4_SSL, msg_count: int) -> str | None:
    for i in range(msg_count, 0, -1):
        _, rep = imap.fetch(str(i), "(RFC822)")
@ -52,13 +48,6 @@ def get_verification_code(email: str, password: str, imap_domain: None | str = N
            time.sleep(1)


-def raise_for_status(rep: Response, label: str):
-    try:
-        rep.raise_for_status()
-    except HTTPStatusError:
-        raise Exception(f"{label} - {rep.status_code} - {rep.text}")
-
-
 def login_get_guest_token(client: Client) -> str:
    rep = client.post("https://api.twitter.com/1.1/guest/activate.json")
    raise_for_status(rep, "guest_token")
--- a/twapi/models.py
+++ b/twapi/models.py
@ -1,5 +1,5 @@
 import email.utils
-from dataclasses import dataclass
+from dataclasses import asdict, dataclass
 from datetime import datetime
 from typing import Optional

@ -7,13 +7,29 @@ from .utils import get_or, int_or_none


@dataclass
-class Coordinates:
-    longitude: float
-    latitude: float
+class JSONTrait:
+    def json(self):
+        return asdict(self)


@dataclass
-class Place:
+class Coordinates(JSONTrait):
+    longitude: float
+    latitude: float
+
+    @staticmethod
+    def parse(tw_obj: dict):
+        if tw_obj.get("coordinates"):
+            coords = tw_obj["coordinates"]["coordinates"]
+            return Coordinates(coords[0], coords[1])
+        if tw_obj.get("geo"):
+            coords = tw_obj["geo"]["coordinates"]
+            return Coordinates(coords[1], coords[0])
+        return None
+
+
+@dataclass
+class Place(JSONTrait):
    id: str
    fullName: str
    name: str
@ -34,7 +50,7 @@ class Place:


@dataclass
-class TextLink:
+class TextLink(JSONTrait):
    url: str
    text: str | None
    tcourl: str | None
@ -51,22 +67,18 @@ class TextLink:


@dataclass
-class UserRef:
+class UserRef(JSONTrait):
    id: int
    username: str
    displayname: str

    @staticmethod
    def parse(obj: dict):
-        return UserRef(
-            id=obj["id"],
-            username=obj["screen_name"],
-            displayname=obj["name"],
-        )
+        return UserRef(id=int(obj["id_str"]), username=obj["screen_name"], displayname=obj["name"])


@dataclass
-class User:
+class User(JSONTrait):
    id: int
    username: str
    displayname: str
@ -115,7 +127,7 @@ class User:


@dataclass
-class Tweet:
+class Tweet(JSONTrait):
    id: int
    date: datetime
    user: User
@ -136,10 +148,6 @@ class Tweet:
    place: Optional[Place] = None
    coordinates: Optional[Coordinates] = None

-    @property
-    def url(self):
-        return f"https://twitter.com/{self.user.username}/status/{self.id}"
-
    # renderedContent: str
    # source: str | None = None
    # sourceUrl: str | None = None
@ -150,23 +158,19 @@ class Tweet:
    # card: typing.Optional["Card"] = None
    # vibe: typing.Optional["Vibe"] = None

+    @property
+    def url(self):
+        return f"https://twitter.com/{self.user.username}/status/{self.id}"
+
    @staticmethod
    def parse(obj: dict, res: dict):
-        rt_obj = get_or(res, f"globalObjects.tweets.{obj.get('retweeted_status_id_str')}")
-        qt_obj = get_or(res, f"globalObjects.tweets.{obj.get('quoted_status_id_str')}")
-
-        coordinates: Coordinates | None = None
-        if obj.get("coordinates"):
-            coords = obj["coordinates"]["coordinates"]
-            coordinates = Coordinates(coords[0], coords[1])
-        elif obj.get("geo"):
-            coords = obj["geo"]["coordinates"]
-            coordinates = Coordinates(coords[1], coords[0])
+        rt_obj = get_or(res, f"tweets.{obj.get('retweeted_status_id_str')}")
+        qt_obj = get_or(res, f"tweets.{obj.get('quoted_status_id_str')}")

        return Tweet(
-            id=obj["id"],
+            id=int(obj["id_str"]),
            date=email.utils.parsedate_to_datetime(obj["created_at"]),
-            user=User.parse(res["globalObjects"]["users"][obj["user_id_str"]]),
+            user=User.parse(res["users"][obj["user_id_str"]]),
            lang=obj["lang"],
            rawContent=obj["full_text"],
            replyCount=obj["reply_count"],
@ -182,5 +186,5 @@ class Tweet:
            retweetedTweet=Tweet.parse(rt_obj, res) if rt_obj else None,
            quotedTweet=Tweet.parse(qt_obj, res) if qt_obj else None,
            place=Place.parse(obj["place"]) if obj.get("place") else None,
-            coordinates=coordinates,
+            coordinates=Coordinates.parse(obj),
        )
--- a/twapi/search.py
+++ b/twapi/search.py
@ -4,9 +4,9 @@ from typing import Awaitable, Callable
 from httpx import AsyncClient, HTTPStatusError, Response
 from loguru import logger

-from .models import Tweet
+from .models import Tweet, User
 from .pool import AccountsPool
-from .utils import find_item
+from .utils import encode_params, find_item, to_old_obj, to_search_like

 BASIC_SEARCH_PARAMS = """
 include_profile_interstitial_type=1
@ -71,6 +71,7 @@ BASE_FEATURES = {

 SEARCH_URL = "https://api.twitter.com/2/search/adaptive.json"
 SEARCH_PARAMS = dict(x.split("=") for x in BASIC_SEARCH_PARAMS.splitlines() if x)
+GRAPHQL_URL = "https://twitter.com/i/api/graphql/"


 def filter_null(obj: dict):
@ -93,6 +94,27 @@ class Search:
    def __init__(self, pool: AccountsPool):
        self.pool = pool

+        # http helpers
+
+    def _limit_msg(self, rep: Response):
+        lr = rep.headers.get("x-rate-limit-remaining", -1)
+        ll = rep.headers.get("x-rate-limit-limit", -1)
+        return f"{lr}/{ll}"
+
+    def _is_end(self, rep: Response, q: str, res: list, cur: str | None, cnt: int, lim: int):
+        new_count = len(res)
+        new_total = cnt + new_count
+
+        is_res = new_count > 0
+        is_cur = cur is not None
+        is_lim = lim > 0 and new_total >= lim
+
+        stats = f"{q} {new_total:,d} (+{new_count:,d})"
+        flags = f"res={int(is_res)} cur={int(is_cur)} lim={int(is_lim)}"
+        logger.debug(" ".join([stats, flags, self._limit_msg(rep)]))
+
+        return new_total, not is_res, not is_cur or is_lim
+
    async def _inf_req(self, queue: str, cb: Callable[[AsyncClient], Awaitable[Response]]):
        while True:
            account = await self.pool.get_account_or_wait(queue)
@ -114,23 +136,7 @@ class Search:
            finally:
                account.unlock(queue)

-    def _check_stop(self, rep: Response, txt: str, cnt: int, res: list, cur: str | None, lim: int):
-        els = len(res)
-        is_res, is_cur, is_lim = els > 0, cur is not None, lim > 0 and cnt >= lim
-
-        msg = [
-            f"{txt} {cnt:,d} (+{els:,d}) res={int(is_res)} cur={int(is_cur)} lim={int(is_lim)}",
-            f"[{rep.headers['x-rate-limit-remaining']}/{rep.headers['x-rate-limit-limit']}]",
-        ]
-        logger.debug(" ".join(msg))
-
-        end_before = not is_res
-        end_after = not is_cur or is_lim
-        return cnt + els, end_before, end_after
-
-    # search
-
-    def get_search_cursor(self, res: dict) -> str | None:
+    def _get_search_cursor(self, res: dict) -> str | None:
        try:
            for x in res["timeline"]["instructions"]:
                entry = x.get("replaceEntry", None)
@ -144,8 +150,63 @@ class Search:
            logger.debug(e)
            return None

+    def get_ql_entries(self, obj: dict) -> list[dict]:
+        entries = find_item(obj, "entries")
+        return entries or []
+
+    def _get_ql_cursor(self, obj: dict) -> str | None:
+        try:
+            for entry in self.get_ql_entries(obj):
+                if entry["entryId"].startswith("cursor-bottom-"):
+                    return entry["content"]["value"]
+            return None
+        except Exception:
+            return None
+
+    async def _ql_items(self, op: str, kv: dict, ft: dict = {}, limit=-1):
+        queue, cursor, count = op.split("/")[-1], None, 0
+
+        async def _get(client: AsyncClient):
+            params = {"variables": {**kv, "cursor": cursor}, "features": BASE_FEATURES}
+            return await client.get(f"{GRAPHQL_URL}/{op}", params=encode_params(params))
+
+        async for rep in self._inf_req(queue, _get):
+            obj = rep.json()
+
+            # cursor-top / cursor-bottom always present
+            entries = self.get_ql_entries(obj)
+            entries = [x for x in entries if not x["entryId"].startswith("cursor-")]
+            cursor = self._get_ql_cursor(obj)
+
+            check = self._is_end(rep, queue, entries, cursor, count, limit)
+            count, end_before, end_after = check
+
+            if end_before:
+                return
+
+            yield rep
+
+            if end_after:
+                return
+
+    async def _ql_item(self, op: str, kv: dict, ft: dict = {}):
+        variables, features = {**kv}, {**BASE_FEATURES, **ft}
+        params = {"variables": variables, "features": features}
+
+        async def _get(client: AsyncClient):
+            return await client.get(f"{GRAPHQL_URL}/{op}", params=encode_params(params))
+
+        queue = op.split("/")[-1]
+        async for rep in self._inf_req(queue, _get):
+            logger.debug(f"{queue} {self._limit_msg(rep)}")
+            return rep
+
+        raise Exception("No response")  # todo
+
+    # search
+
    async def search_raw(self, q: str, limit=-1):
-        queue, cursor, all_count = "search", None, 0
+        queue, cursor, count = "search", None, 0

        async def _get(client: AsyncClient):
            params = {**SEARCH_PARAMS, "q": q, "count": 20}
@ -155,11 +216,11 @@ class Search:
        async for rep in self._inf_req(queue, _get):
            data = rep.json()

-            cursor = self.get_search_cursor(data)
+            cursor = self._get_search_cursor(data)
            tweets = data.get("globalObjects", {}).get("tweets", [])

-            check = self._check_stop(rep, q, all_count, tweets, cursor, limit)
-            all_count, end_before, end_after = check
+            check = self._is_end(rep, q, tweets, cursor, count, limit)
+            count, end_before, end_after = check

            if end_before:
                return
@ -171,85 +232,160 @@ class Search:

    async def search(self, q: str, limit=-1):
        async for rep in self.search_raw(q, limit=limit):
-            data = rep.json()
-            items = list(data.get("globalObjects", {}).get("tweets", {}).values())
-            for x in items:
-                yield Tweet.parse(x, data)
+            res = rep.json()
+            obj = res.get("globalObjects", {})
+            for x in list(obj.get("tweets", {}).values()):
+                yield Tweet.parse(x, obj)

-    # graphql
+    # user_by_id

-    def get_ql_cursor(self, obj: dict) -> str | None:
-        try:
-            for entry in get_ql_entries(obj):
-                if entry["entryId"].startswith("cursor-bottom-"):
-                    return entry["content"]["value"]
-            return None
-        except Exception:
-            return None
-
-    async def graphql_items(self, op: str, variables: dict, features: dict = {}, limit=-1):
-        url = f"https://twitter.com/i/api/graphql/{op}"
-        features = {**BASE_FEATURES, **features}
-
-        queue, cursor, all_count = op.split("/")[-1], None, 0
-
-        async def _get(client: AsyncClient):
-            params = {"variables": {**variables, "cursor": cursor}, "features": features}
-            return await client.get(url, params=json_params(params))
-
-        async for rep in self._inf_req(queue, _get):
-            data = rep.json()
-            entries, cursor = get_ql_entries(data), self.get_ql_cursor(data)
-
-            # cursor-top / cursor-bottom always present
-            items = [x for x in entries if not x["entryId"].startswith("cursor-")]
-            check = self._check_stop(rep, queue, all_count, items, cursor, limit)
-            all_count, end_before, end_after = check
-
-            if end_before:
-                return
-
-            yield rep
-
-            if end_after:
-                return
-
-    async def graphql_item(self, op: str, variables: dict, features: dict = {}):
-        url = f"https://twitter.com/i/api/graphql/{op}"
-        features = {**BASE_FEATURES, **features}
-
-        async def _get(client: AsyncClient):
-            params = {"variables": {**variables}, "features": features}
-            return await client.get(url, params=json_params(params))
-
-        queue = op.split("/")[-1]
-        async for rep in self._inf_req(queue, _get):
-            msg = [
-                f"{queue}",
-                f"[{rep.headers['x-rate-limit-remaining']}/{rep.headers['x-rate-limit-limit']}]",
-            ]
-            logger.debug(" ".join(msg))
-
-            return rep
-
-    async def user_by_login(self, login: str):
-        op = "sLVLhk0bGj3MVFEKTdax1w/UserByScreenName"
-        kv = {"screen_name": login, "withSafetyModeUserFields": True}
-        return await self.graphql_item(op, kv)
-
-    async def user_by_id(self, uid: int):
+    async def user_by_id_raw(self, uid: int):
        op = "GazOglcBvgLigl3ywt6b3Q/UserByRestId"
        kv = {"userId": str(uid), "withSafetyModeUserFields": True}
-        return await self.graphql_item(op, kv)
+        return await self._ql_item(op, kv)

-    async def retweeters(self, twid: int, limit=-1):
+    async def user_by_id(self, uid: int):
+        rep = await self.user_by_id_raw(uid)
+        res = rep.json()
+        return User.parse(to_old_obj(res["data"]["user"]["result"]))
+
+    # user_by_login
+
+    async def user_by_login_raw(self, login: str):
+        op = "sLVLhk0bGj3MVFEKTdax1w/UserByScreenName"
+        kv = {"screen_name": login, "withSafetyModeUserFields": True}
+        return await self._ql_item(op, kv)
+
+    async def user_by_login(self, login: str):
+        rep = await self.user_by_login_raw(login)
+        res = rep.json()
+        return User.parse(to_old_obj(res["data"]["user"]["result"]))
+
+    # tweet_details
+
+    async def tweet_details_raw(self, twid: int):
+        op = "zXaXQgfyR4GxE21uwYQSyA/TweetDetail"
+        kv = {
+            "focalTweetId": str(twid),
+            "referrer": "tweet",  # tweet, profile
+            "with_rux_injections": False,
+            "includePromotedContent": True,
+            "withCommunity": True,
+            "withQuickPromoteEligibilityTweetFields": True,
+            "withBirdwatchNotes": True,
+            "withVoice": True,
+            "withV2Timeline": True,
+            "withDownvotePerspective": False,
+            "withReactionsMetadata": False,
+            "withReactionsPerspective": False,
+            "withSuperFollowsTweetFields": False,
+            "withSuperFollowsUserFields": False,
+        }
+        ft = {
+            "responsive_web_twitter_blue_verified_badge_is_enabled": True,
+            "longform_notetweets_richtext_consumption_enabled": True,
+        }
+        return await self._ql_item(op, kv, ft)
+
+    async def tweet_details(self, twid: int):
+        rep = await self.tweet_details_raw(twid)
+        obj = to_search_like(rep.json())
+        return Tweet.parse(obj["tweets"][str(twid)], obj)
+
+    # followers
+
+    async def followers_raw(self, uid: int, limit=-1):
+        op = "djdTXDIk2qhd4OStqlUFeQ/Followers"
+        kv = {"userId": str(uid), "count": 20, "includePromotedContent": False}
+        async for x in self._ql_items(op, kv, limit=limit):
+            yield x
+
+    async def followers(self, uid: int, limit=-1):
+        async for rep in self.followers_raw(uid, limit=limit):
+            obj = to_search_like(rep.json())
+            for _, v in obj["users"].items():
+                yield User.parse(v)
+
+    # following
+
+    async def following_raw(self, uid: int, limit=-1):
+        op = "IWP6Zt14sARO29lJT35bBw/Following"
+        kv = {"userId": str(uid), "count": 20, "includePromotedContent": False}
+        async for x in self._ql_items(op, kv, limit=limit):
+            yield x
+
+    async def following(self, uid: int, limit=-1):
+        async for rep in self.following_raw(uid, limit=limit):
+            obj = to_search_like(rep.json())
+            for _, v in obj["users"].items():
+                yield User.parse(v)
+
+    # retweeters
+
+    async def retweeters_raw(self, twid: int, limit=-1):
        op = "U5f_jm0CiLmSfI1d4rGleQ/Retweeters"
        kv = {"tweetId": str(twid), "count": 20, "includePromotedContent": True}
-        async for x in self.graphql_items(op, kv, limit=limit):
+        async for x in self._ql_items(op, kv, limit=limit):
+            yield x
+
+    async def retweeters(self, twid: int, limit=-1):
+        async for rep in self.retweeters_raw(twid, limit=limit):
+            obj = to_search_like(rep.json())
+            for _, v in obj["users"].items():
+                yield User.parse(v)
+
+    # favoriters
+
+    async def favoriters_raw(self, twid: int, limit=-1):
+        op = "vcTrPlh9ovFDQejz22q9vg/Favoriters"
+        kv = {"tweetId": str(twid), "count": 20, "includePromotedContent": True}
+        async for x in self._ql_items(op, kv, limit=limit):
            yield x

    async def favoriters(self, twid: int, limit=-1):
-        op = "vcTrPlh9ovFDQejz22q9vg/Favoriters"
-        kv = {"tweetId": str(twid), "count": 20, "includePromotedContent": True}
-        async for x in self.graphql_items(op, kv, limit=limit):
+        async for rep in self.favoriters_raw(twid, limit=limit):
+            obj = to_search_like(rep.json())
+            for _, v in obj["users"].items():
+                yield User.parse(v)
+
+    # user_tweets
+
+    async def user_tweets_raw(self, uid: int, limit=-1):
+        op = "CdG2Vuc1v6F5JyEngGpxVw/UserTweets"
+        kv = {
+            "userId": str(uid),
+            "count": 40,
+            "includePromotedContent": True,
+            "withQuickPromoteEligibilityTweetFields": True,
+            "withVoice": True,
+            "withV2Timeline": True,
+        }
+        async for x in self._ql_items(op, kv, limit=limit):
            yield x
+
+    async def user_tweets(self, uid: int, limit=-1):
+        async for rep in self.user_tweets_raw(uid, limit=limit):
+            obj = to_search_like(rep.json())
+            for _, v in obj["tweets"].items():
+                yield Tweet.parse(v, obj)
+
+    # user_tweets_and_replies
+
+    async def user_tweets_and_replies_raw(self, uid: int, limit=-1):
+        op = "zQxfEr5IFxQ2QZ-XMJlKew/UserTweetsAndReplies"
+        kv = {
+            "userId": str(uid),
+            "count": 40,
+            "includePromotedContent": True,
+            "withCommunity": True,
+            "withVoice": True,
+            "withV2Timeline": True,
+        }
+        async for x in self._ql_items(op, kv, limit=limit):
+            yield x
+
+    async def user_tweets_and_replies(self, uid: int, limit=-1):
+        async for rep in self.user_tweets_and_replies_raw(uid, limit=limit):
+            obj = to_search_like(rep.json())
+            for _, v in obj["tweets"].items():
+                yield Tweet.parse(v, obj)
--- a/twapi/utils.py
+++ b/twapi/utils.py
@ -1,8 +1,49 @@
+import json
+from collections import defaultdict
 from typing import Any, TypeVar

+from httpx import HTTPStatusError, Response
+from loguru import logger
+
 T = TypeVar("T")


+def raise_for_status(rep: Response, label: str):
+    try:
+        rep.raise_for_status()
+    except HTTPStatusError as e:
+        logger.debug(f"{label} - {rep.status_code} - {rep.text}")
+        raise e
+
+
+def encode_params(obj: dict):
+    res = {}
+    for k, v in obj.items():
+        if isinstance(v, dict):
+            v = {a: b for a, b in v.items() if b is not None}
+            v = json.dumps(v, separators=(",", ":"))
+
+        res[k] = str(v)
+
+    return res
+
+
+def get_or(obj: dict, key: str, default_value: T = None) -> Any | T:
+    for part in key.split("."):
+        if part not in obj:
+            return default_value
+        obj = obj[part]
+    return obj
+
+
+def int_or_none(obj: dict, key: str):
+    try:
+        val = get_or(obj, key)
+        return int(val) if val is not None else None
+    except Exception:
+        return None
+
+
 # https://stackoverflow.com/a/43184871
 def find_item(obj: dict, key: str, default=None):
    stack = [iter(obj.items())]
@ -21,17 +62,33 @@ def find_item(obj: dict, key: str, default=None):
    return default


-def get_or(obj: dict, key: str, default_value: T = None) -> Any | T:
-    for part in key.split("."):
-        if part not in obj:
-            return default_value
-        obj = obj[part]
-    return obj
+def get_typed_object(obj: dict, res: defaultdict[str, list]):
+    obj_type = obj.get("__typename", None)
+    if obj_type is not None:
+        res[obj_type].append(obj)
+
+    for k, v in obj.items():
+        if isinstance(v, dict):
+            get_typed_object(v, res)
+        elif isinstance(v, list):
+            for x in v:
+                if isinstance(x, dict):
+                    get_typed_object(x, res)
+
+    return res


-def int_or_none(obj: dict, key: str):
-    try:
-        val = get_or(obj, key)
-        return int(val) if val is not None else None
-    except Exception:
-        return None
+def to_old_obj(obj: dict):
+    return {**obj, **obj["legacy"], "id_str": str(obj["rest_id"]), "id": int(obj["rest_id"])}
+
+
+def to_search_like(obj: dict):
+    tmp = get_typed_object(obj, defaultdict(list))
+
+    tweets = [x for x in tmp.get("Tweet", []) if "legacy" in x]
+    tweets = {str(x["rest_id"]): to_old_obj(x) for x in tweets}
+
+    users = [x for x in tmp.get("User", []) if "legacy" in x and "id" in x]
+    users = {str(x["rest_id"]): to_old_obj(x) for x in users}
+
+    return {"tweets": tweets, "users": users}