зеркало из
				https://github.com/viginum-datalab/twscrape.git
				synced 2025-10-30 21:46:13 +02:00 
			
		
		
		
	add snscrape-like models; remove quering from accounts pool; add parsed version of search method
Этот коммит содержится в:
		
							родитель
							
								
									9744b80a67
								
							
						
					
					
						Коммит
						0b94f6feaa
					
				
							
								
								
									
										1
									
								
								.gitignore
									
									
									
									
										поставляемый
									
									
								
							
							
						
						
									
										1
									
								
								.gitignore
									
									
									
									
										поставляемый
									
									
								
							| @ -1,4 +1,5 @@ | |||||||
| .DS_Store | .DS_Store | ||||||
|  | .ruff_cache/ | ||||||
| sessions/ | sessions/ | ||||||
| 
 | 
 | ||||||
| # Byte-compiled / optimized / DLL files | # Byte-compiled / optimized / DLL files | ||||||
|  | |||||||
							
								
								
									
										6
									
								
								.vscode/settings.json
									
									
									
									
										поставляемый
									
									
								
							
							
						
						
									
										6
									
								
								.vscode/settings.json
									
									
									
									
										поставляемый
									
									
								
							| @ -1,7 +1,11 @@ | |||||||
| { | { | ||||||
|  |   "files.exclude": { | ||||||
|  |     ".ruff_cache": true, | ||||||
|  |     ".pytest_cache": true | ||||||
|  |   }, | ||||||
|   "[python]": { |   "[python]": { | ||||||
|     "editor.formatOnSave": true, |     "editor.formatOnSave": true, | ||||||
|     "editor.codeActionsOnSave": ["source.organizeImports"] |     "editor.codeActionsOnSave": ["source.organizeImports"] | ||||||
|   }, |   }, | ||||||
|   "python.formatting.provider": "black", |   "python.formatting.provider": "black" | ||||||
| } | } | ||||||
|  | |||||||
							
								
								
									
										186
									
								
								twapi/models.py
									
									
									
									
									
										Обычный файл
									
								
							
							
						
						
									
										186
									
								
								twapi/models.py
									
									
									
									
									
										Обычный файл
									
								
							| @ -0,0 +1,186 @@ | |||||||
|  | import email.utils | ||||||
|  | from dataclasses import dataclass | ||||||
|  | from datetime import datetime | ||||||
|  | from typing import Optional | ||||||
|  | 
 | ||||||
|  | from .utils import get_or, int_or_none | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | @dataclass | ||||||
|  | class Coordinates: | ||||||
|  |     longitude: float | ||||||
|  |     latitude: float | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | @dataclass | ||||||
|  | class Place: | ||||||
|  |     id: str | ||||||
|  |     fullName: str | ||||||
|  |     name: str | ||||||
|  |     type: str | ||||||
|  |     country: str | ||||||
|  |     countryCode: str | ||||||
|  | 
 | ||||||
|  |     @staticmethod | ||||||
|  |     def parse(obj: dict): | ||||||
|  |         return Place( | ||||||
|  |             id=obj["id"], | ||||||
|  |             fullName=obj["full_name"], | ||||||
|  |             name=obj["name"], | ||||||
|  |             type=obj["place_type"], | ||||||
|  |             country=obj["country"], | ||||||
|  |             countryCode=obj["country_code"], | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | @dataclass | ||||||
|  | class TextLink: | ||||||
|  |     url: str | ||||||
|  |     text: str | None | ||||||
|  |     tcourl: str | None | ||||||
|  |     indices: tuple[int, int] | ||||||
|  | 
 | ||||||
|  |     @staticmethod | ||||||
|  |     def parse(obj: dict): | ||||||
|  |         return TextLink( | ||||||
|  |             url=obj["expanded_url"], | ||||||
|  |             text=obj["display_url"], | ||||||
|  |             tcourl=obj["url"], | ||||||
|  |             indices=tuple(obj["indices"]), | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | @dataclass | ||||||
|  | class UserRef: | ||||||
|  |     id: int | ||||||
|  |     username: str | ||||||
|  |     displayname: str | ||||||
|  | 
 | ||||||
|  |     @staticmethod | ||||||
|  |     def parse(obj: dict): | ||||||
|  |         return UserRef( | ||||||
|  |             id=obj["id"], | ||||||
|  |             username=obj["screen_name"], | ||||||
|  |             displayname=obj["name"], | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | @dataclass | ||||||
|  | class User: | ||||||
|  |     id: int | ||||||
|  |     username: str | ||||||
|  |     displayname: str | ||||||
|  |     rawDescription: str | ||||||
|  |     created: datetime | ||||||
|  |     followersCount: int | ||||||
|  |     friendsCount: int | ||||||
|  |     statusesCount: int | ||||||
|  |     favouritesCount: int | ||||||
|  |     listedCount: int | ||||||
|  |     mediaCount: int | ||||||
|  |     location: str | ||||||
|  |     profileImageUrl: str | ||||||
|  |     profileBannerUrl: str | None = None | ||||||
|  |     protected: bool | None = None | ||||||
|  |     verified: bool | None = None | ||||||
|  | 
 | ||||||
|  |     # descriptionLinks: typing.Optional[typing.List[TextLink]] = None | ||||||
|  |     # link: typing.Optional[TextLink] = None | ||||||
|  |     # label: typing.Optional["UserLabel"] = None | ||||||
|  | 
 | ||||||
|  |     @property | ||||||
|  |     def url(self) -> str: | ||||||
|  |         return f"https://twitter.com/{self.username}" | ||||||
|  | 
 | ||||||
|  |     @staticmethod | ||||||
|  |     def parse(obj: dict): | ||||||
|  |         return User( | ||||||
|  |             id=int(obj["id_str"]), | ||||||
|  |             username=obj["screen_name"], | ||||||
|  |             displayname=obj["name"], | ||||||
|  |             rawDescription=obj["description"], | ||||||
|  |             created=email.utils.parsedate_to_datetime(obj["created_at"]), | ||||||
|  |             followersCount=obj["followers_count"], | ||||||
|  |             friendsCount=obj["friends_count"], | ||||||
|  |             statusesCount=obj["statuses_count"], | ||||||
|  |             favouritesCount=obj["favourites_count"], | ||||||
|  |             listedCount=obj["listed_count"], | ||||||
|  |             mediaCount=obj["media_count"], | ||||||
|  |             location=obj["location"], | ||||||
|  |             profileImageUrl=obj["profile_image_url_https"], | ||||||
|  |             profileBannerUrl=obj.get("profile_banner_url"), | ||||||
|  |             verified=obj.get("verified"), | ||||||
|  |             protected=obj.get("protected"), | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | @dataclass | ||||||
|  | class Tweet: | ||||||
|  |     id: int | ||||||
|  |     date: datetime | ||||||
|  |     user: User | ||||||
|  |     lang: str | ||||||
|  |     rawContent: str | ||||||
|  |     replyCount: int | ||||||
|  |     retweetCount: int | ||||||
|  |     likeCount: int | ||||||
|  |     quoteCount: int | ||||||
|  |     conversationId: int | ||||||
|  |     hashtags: list[str] | ||||||
|  |     cashtags: list[str] | ||||||
|  |     mentionedUsers: list[UserRef] | ||||||
|  |     links: list[TextLink] | ||||||
|  |     viewCount: int | None = None | ||||||
|  |     retweetedTweet: Optional["Tweet"] = None | ||||||
|  |     quotedTweet: Optional["Tweet"] = None | ||||||
|  |     place: Optional[Place] = None | ||||||
|  |     coordinates: Optional[Coordinates] = None | ||||||
|  | 
 | ||||||
|  |     @property | ||||||
|  |     def url(self): | ||||||
|  |         return f"https://twitter.com/{self.user.username}/status/{self.id}" | ||||||
|  | 
 | ||||||
|  |     # renderedContent: str | ||||||
|  |     # source: str | None = None | ||||||
|  |     # sourceUrl: str | None = None | ||||||
|  |     # sourceLabel: str | None = None | ||||||
|  |     # media: typing.Optional[typing.List["Medium"]] = None | ||||||
|  |     # inReplyToTweetId: typing.Optional[int] = None | ||||||
|  |     # inReplyToUser: typing.Optional["User"] = None | ||||||
|  |     # card: typing.Optional["Card"] = None | ||||||
|  |     # vibe: typing.Optional["Vibe"] = None | ||||||
|  | 
 | ||||||
|  |     @staticmethod | ||||||
|  |     def parse(obj: dict, res: dict): | ||||||
|  |         rt_obj = get_or(res, f"globalObjects.tweets.{obj.get('retweeted_status_id_str')}") | ||||||
|  |         qt_obj = get_or(res, f"globalObjects.tweets.{obj.get('quoted_status_id_str')}") | ||||||
|  | 
 | ||||||
|  |         coordinates: Coordinates | None = None | ||||||
|  |         if obj.get("coordinates"): | ||||||
|  |             coords = obj["coordinates"]["coordinates"] | ||||||
|  |             coordinates = Coordinates(coords[0], coords[1]) | ||||||
|  |         elif obj.get("geo"): | ||||||
|  |             coords = obj["geo"]["coordinates"] | ||||||
|  |             coordinates = Coordinates(coords[1], coords[0]) | ||||||
|  | 
 | ||||||
|  |         return Tweet( | ||||||
|  |             id=obj["id"], | ||||||
|  |             date=email.utils.parsedate_to_datetime(obj["created_at"]), | ||||||
|  |             user=User.parse(res["globalObjects"]["users"][obj["user_id_str"]]), | ||||||
|  |             lang=obj["lang"], | ||||||
|  |             rawContent=obj["full_text"], | ||||||
|  |             replyCount=obj["reply_count"], | ||||||
|  |             retweetCount=obj["retweet_count"], | ||||||
|  |             likeCount=obj["favorite_count"], | ||||||
|  |             quoteCount=obj["quote_count"], | ||||||
|  |             conversationId=int(obj["conversation_id_str"]), | ||||||
|  |             hashtags=[x["text"] for x in get_or(obj, "entities.hashtags", [])], | ||||||
|  |             cashtags=[x["text"] for x in get_or(obj, "entities.symbols", [])], | ||||||
|  |             mentionedUsers=[UserRef.parse(x) for x in get_or(obj, "entities.user_mentions", [])], | ||||||
|  |             links=[TextLink.parse(x) for x in get_or(obj, "entities.urls", [])], | ||||||
|  |             viewCount=int_or_none(obj, "ext_views.count"), | ||||||
|  |             retweetedTweet=Tweet.parse(rt_obj, res) if rt_obj else None, | ||||||
|  |             quotedTweet=Tweet.parse(qt_obj, res) if qt_obj else None, | ||||||
|  |             place=Place.parse(obj["place"]) if obj.get("place") else None, | ||||||
|  |             coordinates=coordinates, | ||||||
|  |         ) | ||||||
| @ -1,7 +1,5 @@ | |||||||
| import asyncio | import asyncio | ||||||
| from typing import AsyncGenerator, Callable, Tuple |  | ||||||
| 
 | 
 | ||||||
| from httpx import AsyncClient, HTTPStatusError, Response |  | ||||||
| from loguru import logger | from loguru import logger | ||||||
| 
 | 
 | ||||||
| from .client import UserClient | from .client import UserClient | ||||||
| @ -30,30 +28,3 @@ class AccountsPool: | |||||||
|             else: |             else: | ||||||
|                 logger.debug(f"No accounts available for queue '{queue}' (sleeping for 5 sec)") |                 logger.debug(f"No accounts available for queue '{queue}' (sleeping for 5 sec)") | ||||||
|                 await asyncio.sleep(5) |                 await asyncio.sleep(5) | ||||||
| 
 |  | ||||||
|     async def execute( |  | ||||||
|         self, |  | ||||||
|         queue: str, |  | ||||||
|         cb: Callable[ |  | ||||||
|             [AsyncClient, str | None], AsyncGenerator[Tuple[Response, dict, str | None], None] |  | ||||||
|         ], |  | ||||||
|         cursor: str | None = None, |  | ||||||
|     ): |  | ||||||
|         while True: |  | ||||||
|             account = await self.get_account_or_wait(queue) |  | ||||||
| 
 |  | ||||||
|             try: |  | ||||||
|                 client = account.make_client() |  | ||||||
|                 async for x in cb(client, cursor): |  | ||||||
|                     rep, data, cursor = x |  | ||||||
|                     yield rep, data, cursor |  | ||||||
|                 return  # exit if no more results |  | ||||||
|             except HTTPStatusError as e: |  | ||||||
|                 if e.response.status_code == 429: |  | ||||||
|                     account.update_limit(queue, e.response) |  | ||||||
|                     logger.debug(f"Rate limit reached for account {account.username}") |  | ||||||
|                     continue |  | ||||||
|                 else: |  | ||||||
|                     raise e |  | ||||||
|             finally: |  | ||||||
|                 account.unlock(queue) |  | ||||||
|  | |||||||
							
								
								
									
										205
									
								
								twapi/search.py
									
									
									
									
									
								
							
							
						
						
									
										205
									
								
								twapi/search.py
									
									
									
									
									
								
							| @ -1,9 +1,12 @@ | |||||||
| import json | import json | ||||||
|  | from typing import Awaitable, Callable | ||||||
| 
 | 
 | ||||||
| from httpx import AsyncClient, Response | from httpx import AsyncClient, HTTPStatusError, Response | ||||||
| from loguru import logger | from loguru import logger | ||||||
| 
 | 
 | ||||||
|  | from .models import Tweet | ||||||
| from .pool import AccountsPool | from .pool import AccountsPool | ||||||
|  | from .utils import find_item | ||||||
| 
 | 
 | ||||||
| BASIC_SEARCH_PARAMS = """ | BASIC_SEARCH_PARAMS = """ | ||||||
| include_profile_interstitial_type=1 | include_profile_interstitial_type=1 | ||||||
| @ -70,34 +73,64 @@ SEARCH_URL = "https://api.twitter.com/2/search/adaptive.json" | |||||||
| SEARCH_PARAMS = dict(x.split("=") for x in BASIC_SEARCH_PARAMS.splitlines() if x) | SEARCH_PARAMS = dict(x.split("=") for x in BASIC_SEARCH_PARAMS.splitlines() if x) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def json_params(params: dict): | def filter_null(obj: dict): | ||||||
|     return {k: json.dumps(v, separators=(",", ":")) for k, v in params.items()} |     try: | ||||||
|  |         return {k: v for k, v in obj.items() if v is not None} | ||||||
|  |     except AttributeError: | ||||||
|  |         return obj | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def json_params(obj: dict): | ||||||
|  |     return {k: json.dumps(filter_null(v), separators=(",", ":")) for k, v in obj.items()} | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def get_ql_entries(obj: dict) -> list[dict]: | def get_ql_entries(obj: dict) -> list[dict]: | ||||||
|     try: |     entries = find_item(obj, "entries") | ||||||
|         key = list(obj["data"].keys())[0] |     return entries or [] | ||||||
|         return obj["data"][key]["timeline"]["instructions"][0]["entries"] |  | ||||||
|     except Exception: |  | ||||||
|         return [] |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| def get_ql_cursor(obj: dict) -> str | None: |  | ||||||
|     for entry in get_ql_entries(obj): |  | ||||||
|         if entry["entryId"].startswith("cursor-bottom-"): |  | ||||||
|             return entry["content"]["value"] |  | ||||||
|     return None |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| def rep_info(rep: Response) -> str: |  | ||||||
|     return f"[{rep.status_code} ~ {rep.headers['x-rate-limit-remaining']}/{rep.headers['x-rate-limit-limit']}]" |  | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class Search: | class Search: | ||||||
|     def __init__(self, pool: AccountsPool): |     def __init__(self, pool: AccountsPool): | ||||||
|         self.pool = pool |         self.pool = pool | ||||||
| 
 | 
 | ||||||
|     def get_next_cursor(self, res: dict) -> str | None: |     async def _inf_req(self, queue: str, cb: Callable[[AsyncClient], Awaitable[Response]]): | ||||||
|  |         while True: | ||||||
|  |             account = await self.pool.get_account_or_wait(queue) | ||||||
|  |             client = account.make_client() | ||||||
|  | 
 | ||||||
|  |             try: | ||||||
|  |                 while True: | ||||||
|  |                     rep = await cb(client) | ||||||
|  |                     rep.raise_for_status() | ||||||
|  |                     yield rep | ||||||
|  |             except HTTPStatusError as e: | ||||||
|  |                 if e.response.status_code == 429: | ||||||
|  |                     logger.debug(f"Rate limit for account={account.username} on queue={queue}") | ||||||
|  |                     account.update_limit(queue, e.response) | ||||||
|  |                     continue | ||||||
|  |                 else: | ||||||
|  |                     logger.error(f"[{e.response.status_code}] {e.request.url}\n{e.response.text}") | ||||||
|  |                     raise e | ||||||
|  |             finally: | ||||||
|  |                 account.unlock(queue) | ||||||
|  | 
 | ||||||
|  |     def _check_stop(self, rep: Response, txt: str, cnt: int, res: list, cur: str | None, lim: int): | ||||||
|  |         els = len(res) | ||||||
|  |         is_res, is_cur, is_lim = els > 0, cur is not None, lim > 0 and cnt >= lim | ||||||
|  | 
 | ||||||
|  |         msg = [ | ||||||
|  |             f"{txt} {cnt:,d} (+{els:,d}) res={int(is_res)} cur={int(is_cur)} lim={int(is_lim)}", | ||||||
|  |             f"[{rep.headers['x-rate-limit-remaining']}/{rep.headers['x-rate-limit-limit']}]", | ||||||
|  |         ] | ||||||
|  |         logger.debug(" ".join(msg)) | ||||||
|  | 
 | ||||||
|  |         end_before = not is_res | ||||||
|  |         end_after = not is_cur or is_lim | ||||||
|  |         return cnt + els, end_before, end_after | ||||||
|  | 
 | ||||||
|  |     # search | ||||||
|  | 
 | ||||||
|  |     def get_search_cursor(self, res: dict) -> str | None: | ||||||
|         try: |         try: | ||||||
|             for x in res["timeline"]["instructions"]: |             for x in res["timeline"]["instructions"]: | ||||||
|                 entry = x.get("replaceEntry", None) |                 entry = x.get("replaceEntry", None) | ||||||
| @ -111,86 +144,112 @@ class Search: | |||||||
|             logger.debug(e) |             logger.debug(e) | ||||||
|             return None |             return None | ||||||
| 
 | 
 | ||||||
|     async def get_items(self, client: AsyncClient, q: str, cursor: str | None): |     async def search_raw(self, q: str, limit=-1): | ||||||
|         while True: |         queue, cursor, all_count = "search", None, 0 | ||||||
|  | 
 | ||||||
|  |         async def _get(client: AsyncClient): | ||||||
|             params = {**SEARCH_PARAMS, "q": q, "count": 20} |             params = {**SEARCH_PARAMS, "q": q, "count": 20} | ||||||
|             params["cursor" if cursor else "requestContext"] = cursor if cursor else "launch" |             params["cursor" if cursor else "requestContext"] = cursor if cursor else "launch" | ||||||
|  |             return await client.get(SEARCH_URL, params=params) | ||||||
| 
 | 
 | ||||||
|             rep = await client.get(SEARCH_URL, params=params) |         async for rep in self._inf_req(queue, _get): | ||||||
|             rep.raise_for_status() |  | ||||||
| 
 |  | ||||||
|             data = rep.json() |             data = rep.json() | ||||||
|             cursor = self.get_next_cursor(data) | 
 | ||||||
|  |             cursor = self.get_search_cursor(data) | ||||||
|             tweets = data.get("globalObjects", {}).get("tweets", []) |             tweets = data.get("globalObjects", {}).get("tweets", []) | ||||||
|             if not tweets or not cursor: | 
 | ||||||
|                 is_result = len(tweets) > 0 |             check = self._check_stop(rep, q, all_count, tweets, cursor, limit) | ||||||
|                 is_cursor = cursor is not None |             all_count, end_before, end_after = check | ||||||
|                 logger.debug(f"{q} - no more items [res={is_result} cur={is_cursor}]") | 
 | ||||||
|  |             if end_before: | ||||||
|                 return |                 return | ||||||
| 
 | 
 | ||||||
|             yield rep, data, cursor |  | ||||||
| 
 |  | ||||||
|     async def search(self, q: str): |  | ||||||
|         total_count = 0 |  | ||||||
|         async for x in self.pool.execute("search", lambda c, cur: self.get_items(c, q, cur)): |  | ||||||
|             rep, data, cursor = x |  | ||||||
| 
 |  | ||||||
|             tweets = data.get("globalObjects", {}).get("tweets", []) |  | ||||||
|             total_count += len(tweets) |  | ||||||
|             logger.debug(f"{q} - {total_count:,d} (+{len(tweets):,d}) {rep_info(rep)}") |  | ||||||
| 
 |  | ||||||
|             yield rep |             yield rep | ||||||
| 
 | 
 | ||||||
|  |             if end_after: | ||||||
|  |                 return | ||||||
|  | 
 | ||||||
|  |     async def search(self, q: str, limit=-1): | ||||||
|  |         async for rep in self.search_raw(q, limit=limit): | ||||||
|  |             data = rep.json() | ||||||
|  |             items = list(data.get("globalObjects", {}).get("tweets", {}).values()) | ||||||
|  |             for x in items: | ||||||
|  |                 yield Tweet.parse(x, data) | ||||||
|  | 
 | ||||||
|  |     # graphql | ||||||
|  | 
 | ||||||
|  |     def get_ql_cursor(self, obj: dict) -> str | None: | ||||||
|  |         try: | ||||||
|  |             for entry in get_ql_entries(obj): | ||||||
|  |                 if entry["entryId"].startswith("cursor-bottom-"): | ||||||
|  |                     return entry["content"]["value"] | ||||||
|  |             return None | ||||||
|  |         except Exception: | ||||||
|  |             return None | ||||||
|  | 
 | ||||||
|     async def graphql_items(self, op: str, variables: dict, features: dict = {}, limit=-1): |     async def graphql_items(self, op: str, variables: dict, features: dict = {}, limit=-1): | ||||||
|         url = f"https://twitter.com/i/api/graphql/{op}" |         url = f"https://twitter.com/i/api/graphql/{op}" | ||||||
|         features = {**BASE_FEATURES, **features} |         features = {**BASE_FEATURES, **features} | ||||||
| 
 | 
 | ||||||
|         cursor, all_count, queue = None, 0, op.split("/")[-1] |         queue, cursor, all_count = op.split("/")[-1], None, 0 | ||||||
|         while True: |  | ||||||
|             account = await self.pool.get_account_or_wait(queue) |  | ||||||
|             client = account.make_client() |  | ||||||
| 
 | 
 | ||||||
|             try: |         async def _get(client: AsyncClient): | ||||||
|                 params = {"variables": {**variables, "cursor": cursor}, "features": features} |             params = {"variables": {**variables, "cursor": cursor}, "features": features} | ||||||
|                 rep = await client.get(url, params=json_params(params)) |             return await client.get(url, params=json_params(params)) | ||||||
|                 logger.debug(f"{url} {rep_info(rep)}") |  | ||||||
|                 rep.raise_for_status() |  | ||||||
| 
 | 
 | ||||||
|                 data = rep.json() |         async for rep in self._inf_req(queue, _get): | ||||||
|                 entries, cursor = get_ql_entries(data), get_ql_cursor(data) |             data = rep.json() | ||||||
|  |             entries, cursor = get_ql_entries(data), self.get_ql_cursor(data) | ||||||
| 
 | 
 | ||||||
|                 # cursor-top / cursor-bottom always present |             # cursor-top / cursor-bottom always present | ||||||
|                 now_count = len([x for x in entries if not x["entryId"].startswith("cursor-")]) |             items = [x for x in entries if not x["entryId"].startswith("cursor-")] | ||||||
|                 all_count += now_count |             check = self._check_stop(rep, queue, all_count, items, cursor, limit) | ||||||
|  |             all_count, end_before, end_after = check | ||||||
| 
 | 
 | ||||||
|                 yield rep |             if end_before: | ||||||
|  |                 return | ||||||
| 
 | 
 | ||||||
|                 if not cursor or not now_count or (limit > 0 and all_count >= limit): |             yield rep | ||||||
|                     return | 
 | ||||||
|             finally: |             if end_after: | ||||||
|                 account.unlock(queue) |                 return | ||||||
| 
 | 
 | ||||||
|     async def graphql_item(self, op: str, variables: dict, features: dict = {}): |     async def graphql_item(self, op: str, variables: dict, features: dict = {}): | ||||||
|         res: list[Response] = [] |         url = f"https://twitter.com/i/api/graphql/{op}" | ||||||
|         async for x in self.graphql_items(op, variables, features): |         features = {**BASE_FEATURES, **features} | ||||||
|             res.append(x) | 
 | ||||||
|             break |         async def _get(client: AsyncClient): | ||||||
|         return res[0] |             params = {"variables": {**variables}, "features": features} | ||||||
|  |             return await client.get(url, params=json_params(params)) | ||||||
|  | 
 | ||||||
|  |         queue = op.split("/")[-1] | ||||||
|  |         async for rep in self._inf_req(queue, _get): | ||||||
|  |             msg = [ | ||||||
|  |                 f"{queue}", | ||||||
|  |                 f"[{rep.headers['x-rate-limit-remaining']}/{rep.headers['x-rate-limit-limit']}]", | ||||||
|  |             ] | ||||||
|  |             logger.debug(" ".join(msg)) | ||||||
|  | 
 | ||||||
|  |             return rep | ||||||
| 
 | 
 | ||||||
|     async def user_by_login(self, login: str): |     async def user_by_login(self, login: str): | ||||||
|         v = {"screen_name": login, "withSafetyModeUserFields": True} |         op = "sLVLhk0bGj3MVFEKTdax1w/UserByScreenName" | ||||||
|         return await self.graphql_item("sLVLhk0bGj3MVFEKTdax1w/UserByScreenName", v) |         kv = {"screen_name": login, "withSafetyModeUserFields": True} | ||||||
|  |         return await self.graphql_item(op, kv) | ||||||
| 
 | 
 | ||||||
|     async def user_by_id(self, uid: int): |     async def user_by_id(self, uid: int): | ||||||
|         v = {"userId": str(uid), "withSafetyModeUserFields": True} |         op = "GazOglcBvgLigl3ywt6b3Q/UserByRestId" | ||||||
|         return await self.graphql_item("GazOglcBvgLigl3ywt6b3Q/UserByRestId", v) |         kv = {"userId": str(uid), "withSafetyModeUserFields": True} | ||||||
|  |         return await self.graphql_item(op, kv) | ||||||
| 
 | 
 | ||||||
|     async def retweeters(self, twid: int, limit=-1): |     async def retweeters(self, twid: int, limit=-1): | ||||||
|         v = {"tweetId": str(twid), "count": 20, "includePromotedContent": True} |         op = "U5f_jm0CiLmSfI1d4rGleQ/Retweeters" | ||||||
|         async for x in self.graphql_items("U5f_jm0CiLmSfI1d4rGleQ/Retweeters", v, limit=limit): |         kv = {"tweetId": str(twid), "count": 20, "includePromotedContent": True} | ||||||
|  |         async for x in self.graphql_items(op, kv, limit=limit): | ||||||
|             yield x |             yield x | ||||||
| 
 | 
 | ||||||
|     async def favoriters(self, twid: int, limit=-1): |     async def favoriters(self, twid: int, limit=-1): | ||||||
|         v = {"tweetId": str(twid), "count": 20, "includePromotedContent": True} |         op = "vcTrPlh9ovFDQejz22q9vg/Favoriters" | ||||||
|         async for x in self.graphql_items("vcTrPlh9ovFDQejz22q9vg/Favoriters", v, limit=limit): |         kv = {"tweetId": str(twid), "count": 20, "includePromotedContent": True} | ||||||
|  |         async for x in self.graphql_items(op, kv, limit=limit): | ||||||
|             yield x |             yield x | ||||||
|  | |||||||
							
								
								
									
										37
									
								
								twapi/utils.py
									
									
									
									
									
										Обычный файл
									
								
							
							
						
						
									
										37
									
								
								twapi/utils.py
									
									
									
									
									
										Обычный файл
									
								
							| @ -0,0 +1,37 @@ | |||||||
|  | from typing import Any, TypeVar | ||||||
|  | 
 | ||||||
|  | T = TypeVar("T") | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # https://stackoverflow.com/a/43184871 | ||||||
|  | def find_item(obj: dict, key: str, default=None): | ||||||
|  |     stack = [iter(obj.items())] | ||||||
|  |     while stack: | ||||||
|  |         for k, v in stack[-1]: | ||||||
|  |             if k == key: | ||||||
|  |                 return v | ||||||
|  |             elif isinstance(v, dict): | ||||||
|  |                 stack.append(iter(v.items())) | ||||||
|  |                 break | ||||||
|  |             elif isinstance(v, list): | ||||||
|  |                 stack.append(iter(enumerate(v))) | ||||||
|  |                 break | ||||||
|  |         else: | ||||||
|  |             stack.pop() | ||||||
|  |     return default | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def get_or(obj: dict, key: str, default_value: T = None) -> Any | T: | ||||||
|  |     for part in key.split("."): | ||||||
|  |         if part not in obj: | ||||||
|  |             return default_value | ||||||
|  |         obj = obj[part] | ||||||
|  |     return obj | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def int_or_none(obj: dict, key: str): | ||||||
|  |     try: | ||||||
|  |         val = get_or(obj, key) | ||||||
|  |         return int(val) if val is not None else None | ||||||
|  |     except Exception: | ||||||
|  |         return None | ||||||
		Загрузка…
	
	
			
			x
			
			
		
	
		Ссылка в новой задаче
	
	Block a user
	 Vlad Pronsky
						Vlad Pronsky