зеркало из
https://github.com/viginum-datalab/twscrape.git
synced 2025-10-30 05:26:20 +02:00
update limit in non _raw functions
Этот коммит содержится в:
родитель
a3bb5d2dc8
Коммит
f43bf3cd16
@ -3,7 +3,7 @@ from httpx import Response
|
||||
from .accounts_pool import AccountsPool
|
||||
from .constants import *
|
||||
from .logger import set_log_level
|
||||
from .models import Tweet, User
|
||||
from .models import Tweet, User, get_tweets, get_users
|
||||
from .queue_client import QueueClient
|
||||
from .utils import encode_params, find_obj, get_by_path, to_old_obj, to_old_rep
|
||||
|
||||
@ -93,14 +93,9 @@ class API:
|
||||
yield x
|
||||
|
||||
async def search(self, q: str, limit=-1, kv=None):
|
||||
twids = set()
|
||||
async for rep in self.search_raw(q, limit=limit, kv=kv):
|
||||
obj = to_old_rep(rep.json())
|
||||
for x in obj["tweets"].values():
|
||||
tmp = Tweet.parse(x, obj)
|
||||
if tmp.id not in twids:
|
||||
twids.add(tmp.id)
|
||||
yield tmp
|
||||
for x in get_tweets(rep.json(), limit):
|
||||
yield x
|
||||
|
||||
# user_by_id
|
||||
|
||||
@ -181,9 +176,8 @@ class API:
|
||||
|
||||
async def followers(self, uid: int, limit=-1, kv=None):
|
||||
async for rep in self.followers_raw(uid, limit=limit, kv=kv):
|
||||
obj = to_old_rep(rep.json())
|
||||
for _, v in obj["users"].items():
|
||||
yield User.parse(v)
|
||||
for x in get_users(rep.json(), limit):
|
||||
yield x
|
||||
|
||||
# following
|
||||
|
||||
@ -195,9 +189,8 @@ class API:
|
||||
|
||||
async def following(self, uid: int, limit=-1, kv=None):
|
||||
async for rep in self.following_raw(uid, limit=limit, kv=kv):
|
||||
obj = to_old_rep(rep.json())
|
||||
for _, v in obj["users"].items():
|
||||
yield User.parse(v)
|
||||
for x in get_users(rep.json(), limit):
|
||||
yield x
|
||||
|
||||
# retweeters
|
||||
|
||||
@ -209,9 +202,8 @@ class API:
|
||||
|
||||
async def retweeters(self, twid: int, limit=-1, kv=None):
|
||||
async for rep in self.retweeters_raw(twid, limit=limit, kv=kv):
|
||||
obj = to_old_rep(rep.json())
|
||||
for _, v in obj["users"].items():
|
||||
yield User.parse(v)
|
||||
for x in get_users(rep.json(), limit):
|
||||
yield x
|
||||
|
||||
# favoriters
|
||||
|
||||
@ -223,9 +215,8 @@ class API:
|
||||
|
||||
async def favoriters(self, twid: int, limit=-1, kv=None):
|
||||
async for rep in self.favoriters_raw(twid, limit=limit, kv=kv):
|
||||
obj = to_old_rep(rep.json())
|
||||
for _, v in obj["users"].items():
|
||||
yield User.parse(v)
|
||||
for x in get_users(rep.json(), limit):
|
||||
yield x
|
||||
|
||||
# user_tweets
|
||||
|
||||
@ -245,9 +236,8 @@ class API:
|
||||
|
||||
async def user_tweets(self, uid: int, limit=-1, kv=None):
|
||||
async for rep in self.user_tweets_raw(uid, limit=limit, kv=kv):
|
||||
obj = to_old_rep(rep.json())
|
||||
for _, v in obj["tweets"].items():
|
||||
yield Tweet.parse(v, obj)
|
||||
for x in get_tweets(rep.json(), limit):
|
||||
yield x
|
||||
|
||||
# user_tweets_and_replies
|
||||
|
||||
@ -267,9 +257,8 @@ class API:
|
||||
|
||||
async def user_tweets_and_replies(self, uid: int, limit=-1, kv=None):
|
||||
async for rep in self.user_tweets_and_replies_raw(uid, limit=limit, kv=kv):
|
||||
obj = to_old_rep(rep.json())
|
||||
for _, v in obj["tweets"].items():
|
||||
yield Tweet.parse(v, obj)
|
||||
for x in get_tweets(rep.json(), limit):
|
||||
yield x
|
||||
|
||||
# list timeline
|
||||
|
||||
@ -285,6 +274,5 @@ class API:
|
||||
|
||||
async def list_timeline(self, list_id: int, limit=-1, kv=None):
|
||||
async for rep in self.list_timeline_raw(list_id, limit=limit, kv=kv):
|
||||
obj = to_old_rep(rep.json())
|
||||
for x in obj["tweets"].values():
|
||||
yield Tweet.parse(x, obj)
|
||||
for x in get_tweets(rep, limit):
|
||||
yield x
|
||||
|
||||
@ -3,10 +3,12 @@ import json
|
||||
import re
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
from typing import Generator, Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from .logger import logger
|
||||
from .utils import find_item, get_or, int_or_none
|
||||
from .utils import find_item, get_or, int_or_none, to_old_rep
|
||||
|
||||
|
||||
@dataclass
|
||||
@ -115,7 +117,7 @@ class User(JSONTrait):
|
||||
# label: typing.Optional["UserLabel"] = None
|
||||
|
||||
@staticmethod
|
||||
def parse(obj: dict):
|
||||
def parse(obj: dict, res=None):
|
||||
return User(
|
||||
id=int(obj["id_str"]),
|
||||
id_str=obj["id_str"],
|
||||
@ -373,3 +375,36 @@ def _get_views(obj: dict, rt_obj: dict):
|
||||
if k is not None:
|
||||
return k
|
||||
return None
|
||||
|
||||
|
||||
# reply parsing
|
||||
|
||||
|
||||
def get_items(rep: httpx.Response, kind: str, limit: int = -1):
|
||||
if kind == "user":
|
||||
Cls = User
|
||||
key = "users"
|
||||
elif kind == "tweet":
|
||||
Cls = Tweet
|
||||
key = "tweets"
|
||||
else:
|
||||
raise ValueError(f"Invalid kind: {kind}")
|
||||
|
||||
ids = set()
|
||||
obj = to_old_rep(rep.json() if "json" in rep else rep) # type: ignore
|
||||
for x in obj[key].values():
|
||||
if limit != -1 and len(ids) >= limit:
|
||||
break
|
||||
|
||||
tmp = Cls.parse(x, obj)
|
||||
if tmp.id not in ids:
|
||||
ids.add(tmp.id)
|
||||
yield tmp
|
||||
|
||||
|
||||
def get_tweets(rep: httpx.Response, limit: int = -1) -> Generator[Tweet, None, None]:
|
||||
return get_items(rep, "tweet", limit) # type: ignore
|
||||
|
||||
|
||||
def get_users(rep: httpx.Response, limit: int = -1) -> Generator[User, None, None]:
|
||||
return get_items(rep, "user", limit) # type: ignore
|
||||
|
||||
@ -155,6 +155,11 @@ class QueueClient:
|
||||
await self._close_ctx(-1, banned=True, msg=msg)
|
||||
raise BannedError(msg)
|
||||
|
||||
# possible banned by old api flow
|
||||
if rep.status_code in (401, 403):
|
||||
await self._close_ctx(utc_ts() + 60 * 60 * 12) # lock for 12 hours
|
||||
raise RateLimitError(msg)
|
||||
|
||||
# content not found
|
||||
if rep.status_code == 200 and "_Missing: No status found with that ID." in msg:
|
||||
return # ignore this error
|
||||
|
||||
Загрузка…
x
Ссылка в новой задаче
Block a user