feat: update search api to gql

2025-10-30 21:46:13 +02:00 · 2023-07-04 15:08:33 +03:00 · 2023-07-04 15:08:33 +03:00 · aa4fbc8cad
--- a/tests/mocked-data/search_raw.json
+++ b/tests/mocked-data/search_raw.json
--- a/twscrape/api.py
+++ b/twscrape/api.py
@ -1,7 +1,7 @@
 from httpx import Response
 from .accounts_pool import AccountsPool
-from .constants import GQL_FEATURES, GQL_URL, SEARCH_PARAMS, SEARCH_URL
+from .constants import GQL_FEATURES, GQL_URL
 from .logger import logger
 from .models import Tweet, User
 from .queue_client import QueueClient, req_id
@ -36,12 +36,15 @@ class API:
    # gql helpers
-    async def _gql_items(self, op: str, kv: dict, limit=-1):
+    async def _gql_items(self, op: str, kv: dict, ft: dict | None = None, limit=-1):
        queue, cursor, count, active = op.split("/")[-1], None, 0, True
        kv, ft = {**kv}, {**GQL_FEATURES, **(ft or {})}
        async with QueueClient(self.pool, queue, self.debug) as client:
            while active:
-                params = {"variables": {**kv, "cursor": cursor}, "features": GQL_FEATURES}
+                params = {"variables": {**kv, "cursor": cursor}, "features": ft}
                if op.endswith("/SearchTimeline"):
                    params["fieldToggles"] = {"withArticleRichContentState": False}
                rep = await client.get(f"{GQL_URL}/{op}", params=encode_params(params))
                obj = rep.json()
@ -65,35 +68,35 @@ class API:
    # search
-    async def search_raw(self, q: str, limit=-1):
+    async def search_raw(self, q: str, limit=-1, kv=None):
-        queue, cursor, count, active = "search", None, 0, True
+        op = "nK1dw4oV3k4w5TdtcAdSww/SearchTimeline"
        kv = {
            "rawQuery": q,
            "count": 20,
            "product": "Latest",
            "querySource": "typed_query",
            **(kv or {}),
        }
        ft = {
            "rweb_lists_timeline_redesign_enabled": True,
            "creator_subscriptions_tweet_preview_api_enabled": True,
            "responsive_web_twitter_article_tweet_consumption_enabled": False,
            "tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": True,
            "responsive_web_media_download_video_enabled": False,
            "longform_notetweets_inline_media_enabled": True,
        }
        async for x in self._gql_items(op, kv, ft, limit=limit):
            yield x
-        async with QueueClient(self.pool, queue, self.debug) as client:
+    async def search(self, q: str, limit=-1, kv=None):
            while active:
                params = {**SEARCH_PARAMS, "q": q, "count": 20}
                params["cursor" if cursor else "requestContext"] = cursor if cursor else "launch"
                rep = await client.get(SEARCH_URL, params=params)
                obj = rep.json()
                tweets = obj.get("globalObjects", {}).get("tweets", [])
                cursor = self._get_cursor(obj)
                rep, count, active = self._is_end(rep, q, tweets, cursor, count, limit)
                if rep is None:
                    return
                yield rep
    async def search(self, q: str, limit=-1):
        twids = set()
-        async for rep in self.search_raw(q, limit=limit):
+        async for rep in self.search_raw(q, limit=limit, kv=kv):
-            res = rep.json()
+            obj = to_old_rep(rep.json())
-            obj = res.get("globalObjects", {})
+            for x in obj["tweets"].values():
-            for x in list(obj.get("tweets", {}).values()):
+                tmp = Tweet.parse(x, obj)
-                if x["id_str"] not in twids:
+                if tmp.id not in twids:
-                    twids.add(x["id_str"])
+                    twids.add(tmp.id)
-                    yield Tweet.parse(x, obj)
+                    yield tmp
    # user_by_id
--- a/twscrape/constants.py
+++ b/twscrape/constants.py
@ -2,7 +2,6 @@ TOKEN = "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Z
 GQL_URL = "https://twitter.com/i/api/graphql"
 LOGIN_URL = "https://api.twitter.com/1.1/onboarding/task.json"
 SEARCH_URL = "https://api.twitter.com/2/search/adaptive.json"
 GQL_FEATURES = {
    "blue_business_profile_image_shape_enabled": True,
@ -25,42 +24,3 @@ GQL_FEATURES = {
    "longform_notetweets_rich_text_read_enabled": True,
    "responsive_web_enhance_cards_enabled": False,
 }
 SEARCH_PARAMS = {
    "include_profile_interstitial_type": "1",
    "include_blocking": "1",
    "include_blocked_by": "1",
    "include_followed_by": "1",
    "include_want_retweets": "1",
    "include_mute_edge": "1",
    "include_can_dm": "1",
    "include_can_media_tag": "1",
    "include_ext_has_nft_avatar": "1",
    "include_ext_is_blue_verified": "1",
    "include_ext_verified_type": "1",
    "include_ext_profile_image_shape": "1",
    "skip_status": "1",
    "cards_platform": "Web-12",
    "include_cards": "1",
    "include_ext_alt_text": "true",
    "include_ext_limited_action_results": "false",
    "include_quote_count": "true",
    "include_reply_count": "1",
    "tweet_mode": "extended",
    "include_ext_views": "true",
    "include_entities": "true",
    "include_user_entities": "true",
    "include_ext_media_color": "true",
    "include_ext_media_availability": "true",
    "include_ext_sensitive_media_warning": "true",
    "include_ext_trusted_friends_metadata": "true",
    "send_error_codes": "true",
    "simple_quoted_tweet": "true",
    "tweet_search_mode": "live",
    "query_source": "typed_query",
    "count": "20",
    "pc": "1",
    "spelling_corrections": "1",
    "include_ext_edit_control": "true",
    "ext": "mediaStats,highlightedLabel,hasNftAvatar,voiceInfo,birdwatchPivot,enrichments,superFollowMetadata,unmentionInfo,editControl,vibe",  # noqa: E501
 }