зеркало из
				https://github.com/viginum-datalab/twscrape.git
				synced 2025-10-30 21:46:13 +02:00 
			
		
		
		
	add graphql api support
Этот коммит содержится в:
		
							родитель
							
								
									2ed247af0f
								
							
						
					
					
						Коммит
						9744b80a67
					
				| @ -20,6 +20,17 @@ class AccountsPool: | |||||||
|                 return x |                 return x | ||||||
|         return None |         return None | ||||||
| 
 | 
 | ||||||
|  |     async def get_account_or_wait(self, queue: str) -> UserClient: | ||||||
|  |         while True: | ||||||
|  |             account = self.get_account(queue) | ||||||
|  |             if account: | ||||||
|  |                 logger.debug(f"Using account {account.username} for queue '{queue}'") | ||||||
|  |                 account.lock(queue) | ||||||
|  |                 return account | ||||||
|  |             else: | ||||||
|  |                 logger.debug(f"No accounts available for queue '{queue}' (sleeping for 5 sec)") | ||||||
|  |                 await asyncio.sleep(5) | ||||||
|  | 
 | ||||||
|     async def execute( |     async def execute( | ||||||
|         self, |         self, | ||||||
|         queue: str, |         queue: str, | ||||||
| @ -29,14 +40,7 @@ class AccountsPool: | |||||||
|         cursor: str | None = None, |         cursor: str | None = None, | ||||||
|     ): |     ): | ||||||
|         while True: |         while True: | ||||||
|             account = self.get_account(queue) |             account = await self.get_account_or_wait(queue) | ||||||
|             if not account: |  | ||||||
|                 logger.debug(f"No accounts available for queue {queue}, sleeping 5 seconds") |  | ||||||
|                 await asyncio.sleep(5) |  | ||||||
|                 continue |  | ||||||
|             else: |  | ||||||
|                 account.lock(queue) |  | ||||||
|                 logger.debug(f"Using account {account.username} for queue {queue}") |  | ||||||
| 
 | 
 | ||||||
|             try: |             try: | ||||||
|                 client = account.make_client() |                 client = account.make_client() | ||||||
| @ -47,7 +51,7 @@ class AccountsPool: | |||||||
|             except HTTPStatusError as e: |             except HTTPStatusError as e: | ||||||
|                 if e.response.status_code == 429: |                 if e.response.status_code == 429: | ||||||
|                     account.update_limit(queue, e.response) |                     account.update_limit(queue, e.response) | ||||||
|                     logger.debug(f"Account {account.username} is frozen") |                     logger.debug(f"Rate limit reached for account {account.username}") | ||||||
|                     continue |                     continue | ||||||
|                 else: |                 else: | ||||||
|                     raise e |                     raise e | ||||||
|  | |||||||
							
								
								
									
										110
									
								
								twapi/search.py
									
									
									
									
									
								
							
							
						
						
									
										110
									
								
								twapi/search.py
									
									
									
									
									
								
							| @ -1,3 +1,5 @@ | |||||||
|  | import json | ||||||
|  | 
 | ||||||
| from httpx import AsyncClient, Response | from httpx import AsyncClient, Response | ||||||
| from loguru import logger | from loguru import logger | ||||||
| 
 | 
 | ||||||
| @ -41,12 +43,54 @@ include_ext_edit_control=true | |||||||
| ext=mediaStats%2ChighlightedLabel%2ChasNftAvatar%2CvoiceInfo%2CbirdwatchPivot%2Cenrichments%2CsuperFollowMetadata%2CunmentionInfo%2CeditControl%2Cvibe | ext=mediaStats%2ChighlightedLabel%2ChasNftAvatar%2CvoiceInfo%2CbirdwatchPivot%2Cenrichments%2CsuperFollowMetadata%2CunmentionInfo%2CeditControl%2Cvibe | ||||||
| """ | """ | ||||||
| 
 | 
 | ||||||
|  | BASE_FEATURES = { | ||||||
|  |     "blue_business_profile_image_shape_enabled": True, | ||||||
|  |     "responsive_web_graphql_exclude_directive_enabled": True, | ||||||
|  |     "verified_phone_label_enabled": False, | ||||||
|  |     "responsive_web_graphql_skip_user_profile_image_extensions_enabled": False, | ||||||
|  |     "responsive_web_graphql_timeline_navigation_enabled": True, | ||||||
|  |     # | ||||||
|  |     "tweetypie_unmention_optimization_enabled": True, | ||||||
|  |     "vibe_api_enabled": True, | ||||||
|  |     "responsive_web_edit_tweet_api_enabled": True, | ||||||
|  |     "graphql_is_translatable_rweb_tweet_is_translatable_enabled": True, | ||||||
|  |     "view_counts_everywhere_api_enabled": True, | ||||||
|  |     "longform_notetweets_consumption_enabled": True, | ||||||
|  |     "tweet_awards_web_tipping_enabled": False, | ||||||
|  |     "freedom_of_speech_not_reach_fetch_enabled": True, | ||||||
|  |     "standardized_nudges_misinfo": True, | ||||||
|  |     "tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": False, | ||||||
|  |     "interactive_text_enabled": True, | ||||||
|  |     "responsive_web_text_conversations_enabled": False, | ||||||
|  |     "longform_notetweets_rich_text_read_enabled": True, | ||||||
|  |     "responsive_web_enhance_cards_enabled": False, | ||||||
|  | } | ||||||
|  | 
 | ||||||
| SEARCH_URL = "https://api.twitter.com/2/search/adaptive.json" | SEARCH_URL = "https://api.twitter.com/2/search/adaptive.json" | ||||||
| SEARCH_PARAMS = dict(x.split("=") for x in BASIC_SEARCH_PARAMS.splitlines() if x) | SEARCH_PARAMS = dict(x.split("=") for x in BASIC_SEARCH_PARAMS.splitlines() if x) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | def json_params(params: dict): | ||||||
|  |     return {k: json.dumps(v, separators=(",", ":")) for k, v in params.items()} | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def get_ql_entries(obj: dict) -> list[dict]: | ||||||
|  |     try: | ||||||
|  |         key = list(obj["data"].keys())[0] | ||||||
|  |         return obj["data"][key]["timeline"]["instructions"][0]["entries"] | ||||||
|  |     except Exception: | ||||||
|  |         return [] | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def get_ql_cursor(obj: dict) -> str | None: | ||||||
|  |     for entry in get_ql_entries(obj): | ||||||
|  |         if entry["entryId"].startswith("cursor-bottom-"): | ||||||
|  |             return entry["content"]["value"] | ||||||
|  |     return None | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| def rep_info(rep: Response) -> str: | def rep_info(rep: Response) -> str: | ||||||
|     return f"[{rep.headers['x-rate-limit-remaining']}/{rep.headers['x-rate-limit-limit']}]" |     return f"[{rep.status_code} ~ {rep.headers['x-rate-limit-remaining']}/{rep.headers['x-rate-limit-limit']}]" | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class Search: | class Search: | ||||||
| @ -67,7 +111,7 @@ class Search: | |||||||
|             logger.debug(e) |             logger.debug(e) | ||||||
|             return None |             return None | ||||||
| 
 | 
 | ||||||
|     async def get(self, client: AsyncClient, q: str, cursor: str | None): |     async def get_items(self, client: AsyncClient, q: str, cursor: str | None): | ||||||
|         while True: |         while True: | ||||||
|             params = {**SEARCH_PARAMS, "q": q, "count": 20} |             params = {**SEARCH_PARAMS, "q": q, "count": 20} | ||||||
|             params["cursor" if cursor else "requestContext"] = cursor if cursor else "launch" |             params["cursor" if cursor else "requestContext"] = cursor if cursor else "launch" | ||||||
| @ -79,16 +123,16 @@ class Search: | |||||||
|             cursor = self.get_next_cursor(data) |             cursor = self.get_next_cursor(data) | ||||||
|             tweets = data.get("globalObjects", {}).get("tweets", []) |             tweets = data.get("globalObjects", {}).get("tweets", []) | ||||||
|             if not tweets or not cursor: |             if not tweets or not cursor: | ||||||
|                 is_tweets = len(tweets) > 0 |                 is_result = len(tweets) > 0 | ||||||
|                 is_cursor = cursor is not None |                 is_cursor = cursor is not None | ||||||
|                 logger.debug(f"{q} - no more results [res: {is_tweets}, cur: {is_cursor}]") |                 logger.debug(f"{q} - no more items [res={is_result} cur={is_cursor}]") | ||||||
|                 return |                 return | ||||||
| 
 | 
 | ||||||
|             yield rep, data, cursor |             yield rep, data, cursor | ||||||
| 
 | 
 | ||||||
|     async def query(self, q: str): |     async def search(self, q: str): | ||||||
|         total_count = 0 |         total_count = 0 | ||||||
|         async for x in self.pool.execute("search", lambda c, cur: self.get(c, q, cur)): |         async for x in self.pool.execute("search", lambda c, cur: self.get_items(c, q, cur)): | ||||||
|             rep, data, cursor = x |             rep, data, cursor = x | ||||||
| 
 | 
 | ||||||
|             tweets = data.get("globalObjects", {}).get("tweets", []) |             tweets = data.get("globalObjects", {}).get("tweets", []) | ||||||
| @ -96,3 +140,57 @@ class Search: | |||||||
|             logger.debug(f"{q} - {total_count:,d} (+{len(tweets):,d}) {rep_info(rep)}") |             logger.debug(f"{q} - {total_count:,d} (+{len(tweets):,d}) {rep_info(rep)}") | ||||||
| 
 | 
 | ||||||
|             yield rep |             yield rep | ||||||
|  | 
 | ||||||
|  |     async def graphql_items(self, op: str, variables: dict, features: dict = {}, limit=-1): | ||||||
|  |         url = f"https://twitter.com/i/api/graphql/{op}" | ||||||
|  |         features = {**BASE_FEATURES, **features} | ||||||
|  | 
 | ||||||
|  |         cursor, all_count, queue = None, 0, op.split("/")[-1] | ||||||
|  |         while True: | ||||||
|  |             account = await self.pool.get_account_or_wait(queue) | ||||||
|  |             client = account.make_client() | ||||||
|  | 
 | ||||||
|  |             try: | ||||||
|  |                 params = {"variables": {**variables, "cursor": cursor}, "features": features} | ||||||
|  |                 rep = await client.get(url, params=json_params(params)) | ||||||
|  |                 logger.debug(f"{url} {rep_info(rep)}") | ||||||
|  |                 rep.raise_for_status() | ||||||
|  | 
 | ||||||
|  |                 data = rep.json() | ||||||
|  |                 entries, cursor = get_ql_entries(data), get_ql_cursor(data) | ||||||
|  | 
 | ||||||
|  |                 # cursor-top / cursor-bottom always present | ||||||
|  |                 now_count = len([x for x in entries if not x["entryId"].startswith("cursor-")]) | ||||||
|  |                 all_count += now_count | ||||||
|  | 
 | ||||||
|  |                 yield rep | ||||||
|  | 
 | ||||||
|  |                 if not cursor or not now_count or (limit > 0 and all_count >= limit): | ||||||
|  |                     return | ||||||
|  |             finally: | ||||||
|  |                 account.unlock(queue) | ||||||
|  | 
 | ||||||
|  |     async def graphql_item(self, op: str, variables: dict, features: dict = {}): | ||||||
|  |         res: list[Response] = [] | ||||||
|  |         async for x in self.graphql_items(op, variables, features): | ||||||
|  |             res.append(x) | ||||||
|  |             break | ||||||
|  |         return res[0] | ||||||
|  | 
 | ||||||
|  |     async def user_by_login(self, login: str): | ||||||
|  |         v = {"screen_name": login, "withSafetyModeUserFields": True} | ||||||
|  |         return await self.graphql_item("sLVLhk0bGj3MVFEKTdax1w/UserByScreenName", v) | ||||||
|  | 
 | ||||||
|  |     async def user_by_id(self, uid: int): | ||||||
|  |         v = {"userId": str(uid), "withSafetyModeUserFields": True} | ||||||
|  |         return await self.graphql_item("GazOglcBvgLigl3ywt6b3Q/UserByRestId", v) | ||||||
|  | 
 | ||||||
|  |     async def retweeters(self, twid: int, limit=-1): | ||||||
|  |         v = {"tweetId": str(twid), "count": 20, "includePromotedContent": True} | ||||||
|  |         async for x in self.graphql_items("U5f_jm0CiLmSfI1d4rGleQ/Retweeters", v, limit=limit): | ||||||
|  |             yield x | ||||||
|  | 
 | ||||||
|  |     async def favoriters(self, twid: int, limit=-1): | ||||||
|  |         v = {"tweetId": str(twid), "count": 20, "includePromotedContent": True} | ||||||
|  |         async for x in self.graphql_items("vcTrPlh9ovFDQejz22q9vg/Favoriters", v, limit=limit): | ||||||
|  |             yield x | ||||||
|  | |||||||
		Загрузка…
	
	
			
			x
			
			
		
	
		Ссылка в новой задаче
	
	Block a user
	 Vlad Pronsky
						Vlad Pronsky