зеркало из
				https://github.com/viginum-datalab/twscrape.git
				synced 2025-10-30 21:46:13 +02:00 
			
		
		
		
	update limit in non _raw functions
Этот коммит содержится в:
		
							родитель
							
								
									a3bb5d2dc8
								
							
						
					
					
						Коммит
						f43bf3cd16
					
				| @ -3,7 +3,7 @@ from httpx import Response | |||||||
| from .accounts_pool import AccountsPool | from .accounts_pool import AccountsPool | ||||||
| from .constants import * | from .constants import * | ||||||
| from .logger import set_log_level | from .logger import set_log_level | ||||||
| from .models import Tweet, User | from .models import Tweet, User, get_tweets, get_users | ||||||
| from .queue_client import QueueClient | from .queue_client import QueueClient | ||||||
| from .utils import encode_params, find_obj, get_by_path, to_old_obj, to_old_rep | from .utils import encode_params, find_obj, get_by_path, to_old_obj, to_old_rep | ||||||
| 
 | 
 | ||||||
| @ -93,14 +93,9 @@ class API: | |||||||
|             yield x |             yield x | ||||||
| 
 | 
 | ||||||
|     async def search(self, q: str, limit=-1, kv=None): |     async def search(self, q: str, limit=-1, kv=None): | ||||||
|         twids = set() |  | ||||||
|         async for rep in self.search_raw(q, limit=limit, kv=kv): |         async for rep in self.search_raw(q, limit=limit, kv=kv): | ||||||
|             obj = to_old_rep(rep.json()) |             for x in get_tweets(rep.json(), limit): | ||||||
|             for x in obj["tweets"].values(): |                 yield x | ||||||
|                 tmp = Tweet.parse(x, obj) |  | ||||||
|                 if tmp.id not in twids: |  | ||||||
|                     twids.add(tmp.id) |  | ||||||
|                     yield tmp |  | ||||||
| 
 | 
 | ||||||
|     # user_by_id |     # user_by_id | ||||||
| 
 | 
 | ||||||
| @ -181,9 +176,8 @@ class API: | |||||||
| 
 | 
 | ||||||
|     async def followers(self, uid: int, limit=-1, kv=None): |     async def followers(self, uid: int, limit=-1, kv=None): | ||||||
|         async for rep in self.followers_raw(uid, limit=limit, kv=kv): |         async for rep in self.followers_raw(uid, limit=limit, kv=kv): | ||||||
|             obj = to_old_rep(rep.json()) |             for x in get_users(rep.json(), limit): | ||||||
|             for _, v in obj["users"].items(): |                 yield x | ||||||
|                 yield User.parse(v) |  | ||||||
| 
 | 
 | ||||||
|     # following |     # following | ||||||
| 
 | 
 | ||||||
| @ -195,9 +189,8 @@ class API: | |||||||
| 
 | 
 | ||||||
|     async def following(self, uid: int, limit=-1, kv=None): |     async def following(self, uid: int, limit=-1, kv=None): | ||||||
|         async for rep in self.following_raw(uid, limit=limit, kv=kv): |         async for rep in self.following_raw(uid, limit=limit, kv=kv): | ||||||
|             obj = to_old_rep(rep.json()) |             for x in get_users(rep.json(), limit): | ||||||
|             for _, v in obj["users"].items(): |                 yield x | ||||||
|                 yield User.parse(v) |  | ||||||
| 
 | 
 | ||||||
|     # retweeters |     # retweeters | ||||||
| 
 | 
 | ||||||
| @ -209,9 +202,8 @@ class API: | |||||||
| 
 | 
 | ||||||
|     async def retweeters(self, twid: int, limit=-1, kv=None): |     async def retweeters(self, twid: int, limit=-1, kv=None): | ||||||
|         async for rep in self.retweeters_raw(twid, limit=limit, kv=kv): |         async for rep in self.retweeters_raw(twid, limit=limit, kv=kv): | ||||||
|             obj = to_old_rep(rep.json()) |             for x in get_users(rep.json(), limit): | ||||||
|             for _, v in obj["users"].items(): |                 yield x | ||||||
|                 yield User.parse(v) |  | ||||||
| 
 | 
 | ||||||
|     # favoriters |     # favoriters | ||||||
| 
 | 
 | ||||||
| @ -223,9 +215,8 @@ class API: | |||||||
| 
 | 
 | ||||||
|     async def favoriters(self, twid: int, limit=-1, kv=None): |     async def favoriters(self, twid: int, limit=-1, kv=None): | ||||||
|         async for rep in self.favoriters_raw(twid, limit=limit, kv=kv): |         async for rep in self.favoriters_raw(twid, limit=limit, kv=kv): | ||||||
|             obj = to_old_rep(rep.json()) |             for x in get_users(rep.json(), limit): | ||||||
|             for _, v in obj["users"].items(): |                 yield x | ||||||
|                 yield User.parse(v) |  | ||||||
| 
 | 
 | ||||||
|     # user_tweets |     # user_tweets | ||||||
| 
 | 
 | ||||||
| @ -245,9 +236,8 @@ class API: | |||||||
| 
 | 
 | ||||||
|     async def user_tweets(self, uid: int, limit=-1, kv=None): |     async def user_tweets(self, uid: int, limit=-1, kv=None): | ||||||
|         async for rep in self.user_tweets_raw(uid, limit=limit, kv=kv): |         async for rep in self.user_tweets_raw(uid, limit=limit, kv=kv): | ||||||
|             obj = to_old_rep(rep.json()) |             for x in get_tweets(rep.json(), limit): | ||||||
|             for _, v in obj["tweets"].items(): |                 yield x | ||||||
|                 yield Tweet.parse(v, obj) |  | ||||||
| 
 | 
 | ||||||
|     # user_tweets_and_replies |     # user_tweets_and_replies | ||||||
| 
 | 
 | ||||||
| @ -267,9 +257,8 @@ class API: | |||||||
| 
 | 
 | ||||||
|     async def user_tweets_and_replies(self, uid: int, limit=-1, kv=None): |     async def user_tweets_and_replies(self, uid: int, limit=-1, kv=None): | ||||||
|         async for rep in self.user_tweets_and_replies_raw(uid, limit=limit, kv=kv): |         async for rep in self.user_tweets_and_replies_raw(uid, limit=limit, kv=kv): | ||||||
|             obj = to_old_rep(rep.json()) |             for x in get_tweets(rep.json(), limit): | ||||||
|             for _, v in obj["tweets"].items(): |                 yield x | ||||||
|                 yield Tweet.parse(v, obj) |  | ||||||
| 
 | 
 | ||||||
|     # list timeline |     # list timeline | ||||||
| 
 | 
 | ||||||
| @ -285,6 +274,5 @@ class API: | |||||||
| 
 | 
 | ||||||
|     async def list_timeline(self, list_id: int, limit=-1, kv=None): |     async def list_timeline(self, list_id: int, limit=-1, kv=None): | ||||||
|         async for rep in self.list_timeline_raw(list_id, limit=limit, kv=kv): |         async for rep in self.list_timeline_raw(list_id, limit=limit, kv=kv): | ||||||
|             obj = to_old_rep(rep.json()) |             for x in get_tweets(rep, limit): | ||||||
|             for x in obj["tweets"].values(): |                 yield x | ||||||
|                 yield Tweet.parse(x, obj) |  | ||||||
|  | |||||||
| @ -3,10 +3,12 @@ import json | |||||||
| import re | import re | ||||||
| from dataclasses import asdict, dataclass, field | from dataclasses import asdict, dataclass, field | ||||||
| from datetime import datetime | from datetime import datetime | ||||||
| from typing import Optional | from typing import Generator, Optional | ||||||
|  | 
 | ||||||
|  | import httpx | ||||||
| 
 | 
 | ||||||
| from .logger import logger | from .logger import logger | ||||||
| from .utils import find_item, get_or, int_or_none | from .utils import find_item, get_or, int_or_none, to_old_rep | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @dataclass | @dataclass | ||||||
| @ -115,7 +117,7 @@ class User(JSONTrait): | |||||||
|     # label: typing.Optional["UserLabel"] = None |     # label: typing.Optional["UserLabel"] = None | ||||||
| 
 | 
 | ||||||
|     @staticmethod |     @staticmethod | ||||||
|     def parse(obj: dict): |     def parse(obj: dict, res=None): | ||||||
|         return User( |         return User( | ||||||
|             id=int(obj["id_str"]), |             id=int(obj["id_str"]), | ||||||
|             id_str=obj["id_str"], |             id_str=obj["id_str"], | ||||||
| @ -373,3 +375,36 @@ def _get_views(obj: dict, rt_obj: dict): | |||||||
|             if k is not None: |             if k is not None: | ||||||
|                 return k |                 return k | ||||||
|     return None |     return None | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # reply parsing | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def get_items(rep: httpx.Response, kind: str, limit: int = -1): | ||||||
|  |     if kind == "user": | ||||||
|  |         Cls = User | ||||||
|  |         key = "users" | ||||||
|  |     elif kind == "tweet": | ||||||
|  |         Cls = Tweet | ||||||
|  |         key = "tweets" | ||||||
|  |     else: | ||||||
|  |         raise ValueError(f"Invalid kind: {kind}") | ||||||
|  | 
 | ||||||
|  |     ids = set() | ||||||
|  |     obj = to_old_rep(rep.json() if "json" in rep else rep)  # type: ignore | ||||||
|  |     for x in obj[key].values(): | ||||||
|  |         if limit != -1 and len(ids) >= limit: | ||||||
|  |             break | ||||||
|  | 
 | ||||||
|  |         tmp = Cls.parse(x, obj) | ||||||
|  |         if tmp.id not in ids: | ||||||
|  |             ids.add(tmp.id) | ||||||
|  |             yield tmp | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def get_tweets(rep: httpx.Response, limit: int = -1) -> Generator[Tweet, None, None]: | ||||||
|  |     return get_items(rep, "tweet", limit)  # type: ignore | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def get_users(rep: httpx.Response, limit: int = -1) -> Generator[User, None, None]: | ||||||
|  |     return get_items(rep, "user", limit)  # type: ignore | ||||||
|  | |||||||
| @ -155,6 +155,11 @@ class QueueClient: | |||||||
|             await self._close_ctx(-1, banned=True, msg=msg) |             await self._close_ctx(-1, banned=True, msg=msg) | ||||||
|             raise BannedError(msg) |             raise BannedError(msg) | ||||||
| 
 | 
 | ||||||
|  |         # possible banned by old api flow | ||||||
|  |         if rep.status_code in (401, 403): | ||||||
|  |             await self._close_ctx(utc_ts() + 60 * 60 * 12)  # lock for 12 hours | ||||||
|  |             raise RateLimitError(msg) | ||||||
|  | 
 | ||||||
|         # content not found |         # content not found | ||||||
|         if rep.status_code == 200 and "_Missing: No status found with that ID." in msg: |         if rep.status_code == 200 and "_Missing: No status found with that ID." in msg: | ||||||
|             return  # ignore this error |             return  # ignore this error | ||||||
|  | |||||||
		Загрузка…
	
	
			
			x
			
			
		
	
		Ссылка в новой задаче
	
	Block a user
	 Vlad Pronsky
						Vlad Pronsky