зеркало из
https://github.com/viginum-datalab/twscrape.git
synced 2025-10-29 13:06:13 +02:00
feat: update search api to gql
Этот коммит содержится в:
родитель
d0479e2ece
Коммит
aa4fbc8cad
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
@ -1,7 +1,7 @@
|
||||
from httpx import Response
|
||||
|
||||
from .accounts_pool import AccountsPool
|
||||
from .constants import GQL_FEATURES, GQL_URL, SEARCH_PARAMS, SEARCH_URL
|
||||
from .constants import GQL_FEATURES, GQL_URL
|
||||
from .logger import logger
|
||||
from .models import Tweet, User
|
||||
from .queue_client import QueueClient, req_id
|
||||
@ -36,12 +36,15 @@ class API:
|
||||
|
||||
# gql helpers
|
||||
|
||||
async def _gql_items(self, op: str, kv: dict, limit=-1):
|
||||
async def _gql_items(self, op: str, kv: dict, ft: dict | None = None, limit=-1):
|
||||
queue, cursor, count, active = op.split("/")[-1], None, 0, True
|
||||
kv, ft = {**kv}, {**GQL_FEATURES, **(ft or {})}
|
||||
|
||||
async with QueueClient(self.pool, queue, self.debug) as client:
|
||||
while active:
|
||||
params = {"variables": {**kv, "cursor": cursor}, "features": GQL_FEATURES}
|
||||
params = {"variables": {**kv, "cursor": cursor}, "features": ft}
|
||||
if op.endswith("/SearchTimeline"):
|
||||
params["fieldToggles"] = {"withArticleRichContentState": False}
|
||||
|
||||
rep = await client.get(f"{GQL_URL}/{op}", params=encode_params(params))
|
||||
obj = rep.json()
|
||||
@ -65,35 +68,35 @@ class API:
|
||||
|
||||
# search
|
||||
|
||||
async def search_raw(self, q: str, limit=-1):
|
||||
queue, cursor, count, active = "search", None, 0, True
|
||||
async def search_raw(self, q: str, limit=-1, kv=None):
|
||||
op = "nK1dw4oV3k4w5TdtcAdSww/SearchTimeline"
|
||||
kv = {
|
||||
"rawQuery": q,
|
||||
"count": 20,
|
||||
"product": "Latest",
|
||||
"querySource": "typed_query",
|
||||
**(kv or {}),
|
||||
}
|
||||
ft = {
|
||||
"rweb_lists_timeline_redesign_enabled": True,
|
||||
"creator_subscriptions_tweet_preview_api_enabled": True,
|
||||
"responsive_web_twitter_article_tweet_consumption_enabled": False,
|
||||
"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": True,
|
||||
"responsive_web_media_download_video_enabled": False,
|
||||
"longform_notetweets_inline_media_enabled": True,
|
||||
}
|
||||
async for x in self._gql_items(op, kv, ft, limit=limit):
|
||||
yield x
|
||||
|
||||
async with QueueClient(self.pool, queue, self.debug) as client:
|
||||
while active:
|
||||
params = {**SEARCH_PARAMS, "q": q, "count": 20}
|
||||
params["cursor" if cursor else "requestContext"] = cursor if cursor else "launch"
|
||||
|
||||
rep = await client.get(SEARCH_URL, params=params)
|
||||
obj = rep.json()
|
||||
|
||||
tweets = obj.get("globalObjects", {}).get("tweets", [])
|
||||
cursor = self._get_cursor(obj)
|
||||
|
||||
rep, count, active = self._is_end(rep, q, tweets, cursor, count, limit)
|
||||
if rep is None:
|
||||
return
|
||||
|
||||
yield rep
|
||||
|
||||
async def search(self, q: str, limit=-1):
|
||||
async def search(self, q: str, limit=-1, kv=None):
|
||||
twids = set()
|
||||
async for rep in self.search_raw(q, limit=limit):
|
||||
res = rep.json()
|
||||
obj = res.get("globalObjects", {})
|
||||
for x in list(obj.get("tweets", {}).values()):
|
||||
if x["id_str"] not in twids:
|
||||
twids.add(x["id_str"])
|
||||
yield Tweet.parse(x, obj)
|
||||
async for rep in self.search_raw(q, limit=limit, kv=kv):
|
||||
obj = to_old_rep(rep.json())
|
||||
for x in obj["tweets"].values():
|
||||
tmp = Tweet.parse(x, obj)
|
||||
if tmp.id not in twids:
|
||||
twids.add(tmp.id)
|
||||
yield tmp
|
||||
|
||||
# user_by_id
|
||||
|
||||
|
||||
@ -2,7 +2,6 @@ TOKEN = "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Z
|
||||
|
||||
GQL_URL = "https://twitter.com/i/api/graphql"
|
||||
LOGIN_URL = "https://api.twitter.com/1.1/onboarding/task.json"
|
||||
SEARCH_URL = "https://api.twitter.com/2/search/adaptive.json"
|
||||
|
||||
GQL_FEATURES = {
|
||||
"blue_business_profile_image_shape_enabled": True,
|
||||
@ -25,42 +24,3 @@ GQL_FEATURES = {
|
||||
"longform_notetweets_rich_text_read_enabled": True,
|
||||
"responsive_web_enhance_cards_enabled": False,
|
||||
}
|
||||
|
||||
SEARCH_PARAMS = {
|
||||
"include_profile_interstitial_type": "1",
|
||||
"include_blocking": "1",
|
||||
"include_blocked_by": "1",
|
||||
"include_followed_by": "1",
|
||||
"include_want_retweets": "1",
|
||||
"include_mute_edge": "1",
|
||||
"include_can_dm": "1",
|
||||
"include_can_media_tag": "1",
|
||||
"include_ext_has_nft_avatar": "1",
|
||||
"include_ext_is_blue_verified": "1",
|
||||
"include_ext_verified_type": "1",
|
||||
"include_ext_profile_image_shape": "1",
|
||||
"skip_status": "1",
|
||||
"cards_platform": "Web-12",
|
||||
"include_cards": "1",
|
||||
"include_ext_alt_text": "true",
|
||||
"include_ext_limited_action_results": "false",
|
||||
"include_quote_count": "true",
|
||||
"include_reply_count": "1",
|
||||
"tweet_mode": "extended",
|
||||
"include_ext_views": "true",
|
||||
"include_entities": "true",
|
||||
"include_user_entities": "true",
|
||||
"include_ext_media_color": "true",
|
||||
"include_ext_media_availability": "true",
|
||||
"include_ext_sensitive_media_warning": "true",
|
||||
"include_ext_trusted_friends_metadata": "true",
|
||||
"send_error_codes": "true",
|
||||
"simple_quoted_tweet": "true",
|
||||
"tweet_search_mode": "live",
|
||||
"query_source": "typed_query",
|
||||
"count": "20",
|
||||
"pc": "1",
|
||||
"spelling_corrections": "1",
|
||||
"include_ext_edit_control": "true",
|
||||
"ext": "mediaStats,highlightedLabel,hasNftAvatar,voiceInfo,birdwatchPivot,enrichments,superFollowMetadata,unmentionInfo,editControl,vibe", # noqa: E501
|
||||
}
|
||||
|
||||
Загрузка…
x
Ссылка в новой задаче
Block a user