зеркало из
				https://github.com/viginum-datalab/twscrape.git
				synced 2025-10-30 21:46:13 +02:00 
			
		
		
		
	feat: update search api to gql
Этот коммит содержится в:
		
							родитель
							
								
									d0479e2ece
								
							
						
					
					
						Коммит
						aa4fbc8cad
					
				
										
											
												Разница между файлами не показана из-за своего большого размера
												Загрузить разницу
											
										
									
								
							| @ -1,7 +1,7 @@ | |||||||
| from httpx import Response | from httpx import Response | ||||||
| 
 | 
 | ||||||
| from .accounts_pool import AccountsPool | from .accounts_pool import AccountsPool | ||||||
| from .constants import GQL_FEATURES, GQL_URL, SEARCH_PARAMS, SEARCH_URL | from .constants import GQL_FEATURES, GQL_URL | ||||||
| from .logger import logger | from .logger import logger | ||||||
| from .models import Tweet, User | from .models import Tweet, User | ||||||
| from .queue_client import QueueClient, req_id | from .queue_client import QueueClient, req_id | ||||||
| @ -36,12 +36,15 @@ class API: | |||||||
| 
 | 
 | ||||||
|     # gql helpers |     # gql helpers | ||||||
| 
 | 
 | ||||||
|     async def _gql_items(self, op: str, kv: dict, limit=-1): |     async def _gql_items(self, op: str, kv: dict, ft: dict | None = None, limit=-1): | ||||||
|         queue, cursor, count, active = op.split("/")[-1], None, 0, True |         queue, cursor, count, active = op.split("/")[-1], None, 0, True | ||||||
|  |         kv, ft = {**kv}, {**GQL_FEATURES, **(ft or {})} | ||||||
| 
 | 
 | ||||||
|         async with QueueClient(self.pool, queue, self.debug) as client: |         async with QueueClient(self.pool, queue, self.debug) as client: | ||||||
|             while active: |             while active: | ||||||
|                 params = {"variables": {**kv, "cursor": cursor}, "features": GQL_FEATURES} |                 params = {"variables": {**kv, "cursor": cursor}, "features": ft} | ||||||
|  |                 if op.endswith("/SearchTimeline"): | ||||||
|  |                     params["fieldToggles"] = {"withArticleRichContentState": False} | ||||||
| 
 | 
 | ||||||
|                 rep = await client.get(f"{GQL_URL}/{op}", params=encode_params(params)) |                 rep = await client.get(f"{GQL_URL}/{op}", params=encode_params(params)) | ||||||
|                 obj = rep.json() |                 obj = rep.json() | ||||||
| @ -65,35 +68,35 @@ class API: | |||||||
| 
 | 
 | ||||||
|     # search |     # search | ||||||
| 
 | 
 | ||||||
|     async def search_raw(self, q: str, limit=-1): |     async def search_raw(self, q: str, limit=-1, kv=None): | ||||||
|         queue, cursor, count, active = "search", None, 0, True |         op = "nK1dw4oV3k4w5TdtcAdSww/SearchTimeline" | ||||||
|  |         kv = { | ||||||
|  |             "rawQuery": q, | ||||||
|  |             "count": 20, | ||||||
|  |             "product": "Latest", | ||||||
|  |             "querySource": "typed_query", | ||||||
|  |             **(kv or {}), | ||||||
|  |         } | ||||||
|  |         ft = { | ||||||
|  |             "rweb_lists_timeline_redesign_enabled": True, | ||||||
|  |             "creator_subscriptions_tweet_preview_api_enabled": True, | ||||||
|  |             "responsive_web_twitter_article_tweet_consumption_enabled": False, | ||||||
|  |             "tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": True, | ||||||
|  |             "responsive_web_media_download_video_enabled": False, | ||||||
|  |             "longform_notetweets_inline_media_enabled": True, | ||||||
|  |         } | ||||||
|  |         async for x in self._gql_items(op, kv, ft, limit=limit): | ||||||
|  |             yield x | ||||||
| 
 | 
 | ||||||
|         async with QueueClient(self.pool, queue, self.debug) as client: |     async def search(self, q: str, limit=-1, kv=None): | ||||||
|             while active: |  | ||||||
|                 params = {**SEARCH_PARAMS, "q": q, "count": 20} |  | ||||||
|                 params["cursor" if cursor else "requestContext"] = cursor if cursor else "launch" |  | ||||||
| 
 |  | ||||||
|                 rep = await client.get(SEARCH_URL, params=params) |  | ||||||
|                 obj = rep.json() |  | ||||||
| 
 |  | ||||||
|                 tweets = obj.get("globalObjects", {}).get("tweets", []) |  | ||||||
|                 cursor = self._get_cursor(obj) |  | ||||||
| 
 |  | ||||||
|                 rep, count, active = self._is_end(rep, q, tweets, cursor, count, limit) |  | ||||||
|                 if rep is None: |  | ||||||
|                     return |  | ||||||
| 
 |  | ||||||
|                 yield rep |  | ||||||
| 
 |  | ||||||
|     async def search(self, q: str, limit=-1): |  | ||||||
|         twids = set() |         twids = set() | ||||||
|         async for rep in self.search_raw(q, limit=limit): |         async for rep in self.search_raw(q, limit=limit, kv=kv): | ||||||
|             res = rep.json() |             obj = to_old_rep(rep.json()) | ||||||
|             obj = res.get("globalObjects", {}) |             for x in obj["tweets"].values(): | ||||||
|             for x in list(obj.get("tweets", {}).values()): |                 tmp = Tweet.parse(x, obj) | ||||||
|                 if x["id_str"] not in twids: |                 if tmp.id not in twids: | ||||||
|                     twids.add(x["id_str"]) |                     twids.add(tmp.id) | ||||||
|                     yield Tweet.parse(x, obj) |                     yield tmp | ||||||
| 
 | 
 | ||||||
|     # user_by_id |     # user_by_id | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -2,7 +2,6 @@ TOKEN = "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Z | |||||||
| 
 | 
 | ||||||
| GQL_URL = "https://twitter.com/i/api/graphql" | GQL_URL = "https://twitter.com/i/api/graphql" | ||||||
| LOGIN_URL = "https://api.twitter.com/1.1/onboarding/task.json" | LOGIN_URL = "https://api.twitter.com/1.1/onboarding/task.json" | ||||||
| SEARCH_URL = "https://api.twitter.com/2/search/adaptive.json" |  | ||||||
| 
 | 
 | ||||||
| GQL_FEATURES = { | GQL_FEATURES = { | ||||||
|     "blue_business_profile_image_shape_enabled": True, |     "blue_business_profile_image_shape_enabled": True, | ||||||
| @ -25,42 +24,3 @@ GQL_FEATURES = { | |||||||
|     "longform_notetweets_rich_text_read_enabled": True, |     "longform_notetweets_rich_text_read_enabled": True, | ||||||
|     "responsive_web_enhance_cards_enabled": False, |     "responsive_web_enhance_cards_enabled": False, | ||||||
| } | } | ||||||
| 
 |  | ||||||
| SEARCH_PARAMS = { |  | ||||||
|     "include_profile_interstitial_type": "1", |  | ||||||
|     "include_blocking": "1", |  | ||||||
|     "include_blocked_by": "1", |  | ||||||
|     "include_followed_by": "1", |  | ||||||
|     "include_want_retweets": "1", |  | ||||||
|     "include_mute_edge": "1", |  | ||||||
|     "include_can_dm": "1", |  | ||||||
|     "include_can_media_tag": "1", |  | ||||||
|     "include_ext_has_nft_avatar": "1", |  | ||||||
|     "include_ext_is_blue_verified": "1", |  | ||||||
|     "include_ext_verified_type": "1", |  | ||||||
|     "include_ext_profile_image_shape": "1", |  | ||||||
|     "skip_status": "1", |  | ||||||
|     "cards_platform": "Web-12", |  | ||||||
|     "include_cards": "1", |  | ||||||
|     "include_ext_alt_text": "true", |  | ||||||
|     "include_ext_limited_action_results": "false", |  | ||||||
|     "include_quote_count": "true", |  | ||||||
|     "include_reply_count": "1", |  | ||||||
|     "tweet_mode": "extended", |  | ||||||
|     "include_ext_views": "true", |  | ||||||
|     "include_entities": "true", |  | ||||||
|     "include_user_entities": "true", |  | ||||||
|     "include_ext_media_color": "true", |  | ||||||
|     "include_ext_media_availability": "true", |  | ||||||
|     "include_ext_sensitive_media_warning": "true", |  | ||||||
|     "include_ext_trusted_friends_metadata": "true", |  | ||||||
|     "send_error_codes": "true", |  | ||||||
|     "simple_quoted_tweet": "true", |  | ||||||
|     "tweet_search_mode": "live", |  | ||||||
|     "query_source": "typed_query", |  | ||||||
|     "count": "20", |  | ||||||
|     "pc": "1", |  | ||||||
|     "spelling_corrections": "1", |  | ||||||
|     "include_ext_edit_control": "true", |  | ||||||
|     "ext": "mediaStats,highlightedLabel,hasNftAvatar,voiceInfo,birdwatchPivot,enrichments,superFollowMetadata,unmentionInfo,editControl,vibe",  # noqa: E501 |  | ||||||
| } |  | ||||||
|  | |||||||
		Загрузка…
	
	
			
			x
			
			
		
	
		Ссылка в новой задаче
	
	Block a user
	 Vlad Pronsky
						Vlad Pronsky