зеркало из
https://github.com/viginum-datalab/twscrape.git
synced 2025-10-29 21:16:25 +02:00
add more graphql methods; add parse to model for graphql responses
Этот коммит содержится в:
родитель
0b94f6feaa
Коммит
d4d867aaab
@ -6,19 +6,15 @@ import time
|
|||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
from fake_useragent import UserAgent
|
from fake_useragent import UserAgent
|
||||||
from httpx import AsyncClient, Client, HTTPStatusError, Response
|
from httpx import AsyncClient, Client, Response
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
|
from .utils import raise_for_status
|
||||||
|
|
||||||
TOKEN = "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA"
|
TOKEN = "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA"
|
||||||
TASK_URL = "https://api.twitter.com/1.1/onboarding/task.json"
|
TASK_URL = "https://api.twitter.com/1.1/onboarding/task.json"
|
||||||
|
|
||||||
|
|
||||||
class RateLimitExceeded(Exception):
|
|
||||||
def __init__(self, reset: int, cursor: str | None = None):
|
|
||||||
self.reset = reset
|
|
||||||
self.cursor = cursor
|
|
||||||
|
|
||||||
|
|
||||||
def search_email_with_confirmation_code(imap: imaplib.IMAP4_SSL, msg_count: int) -> str | None:
|
def search_email_with_confirmation_code(imap: imaplib.IMAP4_SSL, msg_count: int) -> str | None:
|
||||||
for i in range(msg_count, 0, -1):
|
for i in range(msg_count, 0, -1):
|
||||||
_, rep = imap.fetch(str(i), "(RFC822)")
|
_, rep = imap.fetch(str(i), "(RFC822)")
|
||||||
@ -52,13 +48,6 @@ def get_verification_code(email: str, password: str, imap_domain: None | str = N
|
|||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
|
||||||
|
|
||||||
def raise_for_status(rep: Response, label: str):
|
|
||||||
try:
|
|
||||||
rep.raise_for_status()
|
|
||||||
except HTTPStatusError:
|
|
||||||
raise Exception(f"{label} - {rep.status_code} - {rep.text}")
|
|
||||||
|
|
||||||
|
|
||||||
def login_get_guest_token(client: Client) -> str:
|
def login_get_guest_token(client: Client) -> str:
|
||||||
rep = client.post("https://api.twitter.com/1.1/guest/activate.json")
|
rep = client.post("https://api.twitter.com/1.1/guest/activate.json")
|
||||||
raise_for_status(rep, "guest_token")
|
raise_for_status(rep, "guest_token")
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
import email.utils
|
import email.utils
|
||||||
from dataclasses import dataclass
|
from dataclasses import asdict, dataclass
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
@ -7,13 +7,29 @@ from .utils import get_or, int_or_none
|
|||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Coordinates:
|
class JSONTrait:
|
||||||
longitude: float
|
def json(self):
|
||||||
latitude: float
|
return asdict(self)
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Place:
|
class Coordinates(JSONTrait):
|
||||||
|
longitude: float
|
||||||
|
latitude: float
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def parse(tw_obj: dict):
|
||||||
|
if tw_obj.get("coordinates"):
|
||||||
|
coords = tw_obj["coordinates"]["coordinates"]
|
||||||
|
return Coordinates(coords[0], coords[1])
|
||||||
|
if tw_obj.get("geo"):
|
||||||
|
coords = tw_obj["geo"]["coordinates"]
|
||||||
|
return Coordinates(coords[1], coords[0])
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Place(JSONTrait):
|
||||||
id: str
|
id: str
|
||||||
fullName: str
|
fullName: str
|
||||||
name: str
|
name: str
|
||||||
@ -34,7 +50,7 @@ class Place:
|
|||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class TextLink:
|
class TextLink(JSONTrait):
|
||||||
url: str
|
url: str
|
||||||
text: str | None
|
text: str | None
|
||||||
tcourl: str | None
|
tcourl: str | None
|
||||||
@ -51,22 +67,18 @@ class TextLink:
|
|||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class UserRef:
|
class UserRef(JSONTrait):
|
||||||
id: int
|
id: int
|
||||||
username: str
|
username: str
|
||||||
displayname: str
|
displayname: str
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def parse(obj: dict):
|
def parse(obj: dict):
|
||||||
return UserRef(
|
return UserRef(id=int(obj["id_str"]), username=obj["screen_name"], displayname=obj["name"])
|
||||||
id=obj["id"],
|
|
||||||
username=obj["screen_name"],
|
|
||||||
displayname=obj["name"],
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class User:
|
class User(JSONTrait):
|
||||||
id: int
|
id: int
|
||||||
username: str
|
username: str
|
||||||
displayname: str
|
displayname: str
|
||||||
@ -115,7 +127,7 @@ class User:
|
|||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Tweet:
|
class Tweet(JSONTrait):
|
||||||
id: int
|
id: int
|
||||||
date: datetime
|
date: datetime
|
||||||
user: User
|
user: User
|
||||||
@ -136,10 +148,6 @@ class Tweet:
|
|||||||
place: Optional[Place] = None
|
place: Optional[Place] = None
|
||||||
coordinates: Optional[Coordinates] = None
|
coordinates: Optional[Coordinates] = None
|
||||||
|
|
||||||
@property
|
|
||||||
def url(self):
|
|
||||||
return f"https://twitter.com/{self.user.username}/status/{self.id}"
|
|
||||||
|
|
||||||
# renderedContent: str
|
# renderedContent: str
|
||||||
# source: str | None = None
|
# source: str | None = None
|
||||||
# sourceUrl: str | None = None
|
# sourceUrl: str | None = None
|
||||||
@ -150,23 +158,19 @@ class Tweet:
|
|||||||
# card: typing.Optional["Card"] = None
|
# card: typing.Optional["Card"] = None
|
||||||
# vibe: typing.Optional["Vibe"] = None
|
# vibe: typing.Optional["Vibe"] = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def url(self):
|
||||||
|
return f"https://twitter.com/{self.user.username}/status/{self.id}"
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def parse(obj: dict, res: dict):
|
def parse(obj: dict, res: dict):
|
||||||
rt_obj = get_or(res, f"globalObjects.tweets.{obj.get('retweeted_status_id_str')}")
|
rt_obj = get_or(res, f"tweets.{obj.get('retweeted_status_id_str')}")
|
||||||
qt_obj = get_or(res, f"globalObjects.tweets.{obj.get('quoted_status_id_str')}")
|
qt_obj = get_or(res, f"tweets.{obj.get('quoted_status_id_str')}")
|
||||||
|
|
||||||
coordinates: Coordinates | None = None
|
|
||||||
if obj.get("coordinates"):
|
|
||||||
coords = obj["coordinates"]["coordinates"]
|
|
||||||
coordinates = Coordinates(coords[0], coords[1])
|
|
||||||
elif obj.get("geo"):
|
|
||||||
coords = obj["geo"]["coordinates"]
|
|
||||||
coordinates = Coordinates(coords[1], coords[0])
|
|
||||||
|
|
||||||
return Tweet(
|
return Tweet(
|
||||||
id=obj["id"],
|
id=int(obj["id_str"]),
|
||||||
date=email.utils.parsedate_to_datetime(obj["created_at"]),
|
date=email.utils.parsedate_to_datetime(obj["created_at"]),
|
||||||
user=User.parse(res["globalObjects"]["users"][obj["user_id_str"]]),
|
user=User.parse(res["users"][obj["user_id_str"]]),
|
||||||
lang=obj["lang"],
|
lang=obj["lang"],
|
||||||
rawContent=obj["full_text"],
|
rawContent=obj["full_text"],
|
||||||
replyCount=obj["reply_count"],
|
replyCount=obj["reply_count"],
|
||||||
@ -182,5 +186,5 @@ class Tweet:
|
|||||||
retweetedTweet=Tweet.parse(rt_obj, res) if rt_obj else None,
|
retweetedTweet=Tweet.parse(rt_obj, res) if rt_obj else None,
|
||||||
quotedTweet=Tweet.parse(qt_obj, res) if qt_obj else None,
|
quotedTweet=Tweet.parse(qt_obj, res) if qt_obj else None,
|
||||||
place=Place.parse(obj["place"]) if obj.get("place") else None,
|
place=Place.parse(obj["place"]) if obj.get("place") else None,
|
||||||
coordinates=coordinates,
|
coordinates=Coordinates.parse(obj),
|
||||||
)
|
)
|
||||||
|
|||||||
324
twapi/search.py
324
twapi/search.py
@ -4,9 +4,9 @@ from typing import Awaitable, Callable
|
|||||||
from httpx import AsyncClient, HTTPStatusError, Response
|
from httpx import AsyncClient, HTTPStatusError, Response
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from .models import Tweet
|
from .models import Tweet, User
|
||||||
from .pool import AccountsPool
|
from .pool import AccountsPool
|
||||||
from .utils import find_item
|
from .utils import encode_params, find_item, to_old_obj, to_search_like
|
||||||
|
|
||||||
BASIC_SEARCH_PARAMS = """
|
BASIC_SEARCH_PARAMS = """
|
||||||
include_profile_interstitial_type=1
|
include_profile_interstitial_type=1
|
||||||
@ -71,6 +71,7 @@ BASE_FEATURES = {
|
|||||||
|
|
||||||
SEARCH_URL = "https://api.twitter.com/2/search/adaptive.json"
|
SEARCH_URL = "https://api.twitter.com/2/search/adaptive.json"
|
||||||
SEARCH_PARAMS = dict(x.split("=") for x in BASIC_SEARCH_PARAMS.splitlines() if x)
|
SEARCH_PARAMS = dict(x.split("=") for x in BASIC_SEARCH_PARAMS.splitlines() if x)
|
||||||
|
GRAPHQL_URL = "https://twitter.com/i/api/graphql/"
|
||||||
|
|
||||||
|
|
||||||
def filter_null(obj: dict):
|
def filter_null(obj: dict):
|
||||||
@ -93,6 +94,27 @@ class Search:
|
|||||||
def __init__(self, pool: AccountsPool):
|
def __init__(self, pool: AccountsPool):
|
||||||
self.pool = pool
|
self.pool = pool
|
||||||
|
|
||||||
|
# http helpers
|
||||||
|
|
||||||
|
def _limit_msg(self, rep: Response):
|
||||||
|
lr = rep.headers.get("x-rate-limit-remaining", -1)
|
||||||
|
ll = rep.headers.get("x-rate-limit-limit", -1)
|
||||||
|
return f"{lr}/{ll}"
|
||||||
|
|
||||||
|
def _is_end(self, rep: Response, q: str, res: list, cur: str | None, cnt: int, lim: int):
|
||||||
|
new_count = len(res)
|
||||||
|
new_total = cnt + new_count
|
||||||
|
|
||||||
|
is_res = new_count > 0
|
||||||
|
is_cur = cur is not None
|
||||||
|
is_lim = lim > 0 and new_total >= lim
|
||||||
|
|
||||||
|
stats = f"{q} {new_total:,d} (+{new_count:,d})"
|
||||||
|
flags = f"res={int(is_res)} cur={int(is_cur)} lim={int(is_lim)}"
|
||||||
|
logger.debug(" ".join([stats, flags, self._limit_msg(rep)]))
|
||||||
|
|
||||||
|
return new_total, not is_res, not is_cur or is_lim
|
||||||
|
|
||||||
async def _inf_req(self, queue: str, cb: Callable[[AsyncClient], Awaitable[Response]]):
|
async def _inf_req(self, queue: str, cb: Callable[[AsyncClient], Awaitable[Response]]):
|
||||||
while True:
|
while True:
|
||||||
account = await self.pool.get_account_or_wait(queue)
|
account = await self.pool.get_account_or_wait(queue)
|
||||||
@ -114,23 +136,7 @@ class Search:
|
|||||||
finally:
|
finally:
|
||||||
account.unlock(queue)
|
account.unlock(queue)
|
||||||
|
|
||||||
def _check_stop(self, rep: Response, txt: str, cnt: int, res: list, cur: str | None, lim: int):
|
def _get_search_cursor(self, res: dict) -> str | None:
|
||||||
els = len(res)
|
|
||||||
is_res, is_cur, is_lim = els > 0, cur is not None, lim > 0 and cnt >= lim
|
|
||||||
|
|
||||||
msg = [
|
|
||||||
f"{txt} {cnt:,d} (+{els:,d}) res={int(is_res)} cur={int(is_cur)} lim={int(is_lim)}",
|
|
||||||
f"[{rep.headers['x-rate-limit-remaining']}/{rep.headers['x-rate-limit-limit']}]",
|
|
||||||
]
|
|
||||||
logger.debug(" ".join(msg))
|
|
||||||
|
|
||||||
end_before = not is_res
|
|
||||||
end_after = not is_cur or is_lim
|
|
||||||
return cnt + els, end_before, end_after
|
|
||||||
|
|
||||||
# search
|
|
||||||
|
|
||||||
def get_search_cursor(self, res: dict) -> str | None:
|
|
||||||
try:
|
try:
|
||||||
for x in res["timeline"]["instructions"]:
|
for x in res["timeline"]["instructions"]:
|
||||||
entry = x.get("replaceEntry", None)
|
entry = x.get("replaceEntry", None)
|
||||||
@ -144,8 +150,63 @@ class Search:
|
|||||||
logger.debug(e)
|
logger.debug(e)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def get_ql_entries(self, obj: dict) -> list[dict]:
|
||||||
|
entries = find_item(obj, "entries")
|
||||||
|
return entries or []
|
||||||
|
|
||||||
|
def _get_ql_cursor(self, obj: dict) -> str | None:
|
||||||
|
try:
|
||||||
|
for entry in self.get_ql_entries(obj):
|
||||||
|
if entry["entryId"].startswith("cursor-bottom-"):
|
||||||
|
return entry["content"]["value"]
|
||||||
|
return None
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def _ql_items(self, op: str, kv: dict, ft: dict = {}, limit=-1):
|
||||||
|
queue, cursor, count = op.split("/")[-1], None, 0
|
||||||
|
|
||||||
|
async def _get(client: AsyncClient):
|
||||||
|
params = {"variables": {**kv, "cursor": cursor}, "features": BASE_FEATURES}
|
||||||
|
return await client.get(f"{GRAPHQL_URL}/{op}", params=encode_params(params))
|
||||||
|
|
||||||
|
async for rep in self._inf_req(queue, _get):
|
||||||
|
obj = rep.json()
|
||||||
|
|
||||||
|
# cursor-top / cursor-bottom always present
|
||||||
|
entries = self.get_ql_entries(obj)
|
||||||
|
entries = [x for x in entries if not x["entryId"].startswith("cursor-")]
|
||||||
|
cursor = self._get_ql_cursor(obj)
|
||||||
|
|
||||||
|
check = self._is_end(rep, queue, entries, cursor, count, limit)
|
||||||
|
count, end_before, end_after = check
|
||||||
|
|
||||||
|
if end_before:
|
||||||
|
return
|
||||||
|
|
||||||
|
yield rep
|
||||||
|
|
||||||
|
if end_after:
|
||||||
|
return
|
||||||
|
|
||||||
|
async def _ql_item(self, op: str, kv: dict, ft: dict = {}):
|
||||||
|
variables, features = {**kv}, {**BASE_FEATURES, **ft}
|
||||||
|
params = {"variables": variables, "features": features}
|
||||||
|
|
||||||
|
async def _get(client: AsyncClient):
|
||||||
|
return await client.get(f"{GRAPHQL_URL}/{op}", params=encode_params(params))
|
||||||
|
|
||||||
|
queue = op.split("/")[-1]
|
||||||
|
async for rep in self._inf_req(queue, _get):
|
||||||
|
logger.debug(f"{queue} {self._limit_msg(rep)}")
|
||||||
|
return rep
|
||||||
|
|
||||||
|
raise Exception("No response") # todo
|
||||||
|
|
||||||
|
# search
|
||||||
|
|
||||||
async def search_raw(self, q: str, limit=-1):
|
async def search_raw(self, q: str, limit=-1):
|
||||||
queue, cursor, all_count = "search", None, 0
|
queue, cursor, count = "search", None, 0
|
||||||
|
|
||||||
async def _get(client: AsyncClient):
|
async def _get(client: AsyncClient):
|
||||||
params = {**SEARCH_PARAMS, "q": q, "count": 20}
|
params = {**SEARCH_PARAMS, "q": q, "count": 20}
|
||||||
@ -155,11 +216,11 @@ class Search:
|
|||||||
async for rep in self._inf_req(queue, _get):
|
async for rep in self._inf_req(queue, _get):
|
||||||
data = rep.json()
|
data = rep.json()
|
||||||
|
|
||||||
cursor = self.get_search_cursor(data)
|
cursor = self._get_search_cursor(data)
|
||||||
tweets = data.get("globalObjects", {}).get("tweets", [])
|
tweets = data.get("globalObjects", {}).get("tweets", [])
|
||||||
|
|
||||||
check = self._check_stop(rep, q, all_count, tweets, cursor, limit)
|
check = self._is_end(rep, q, tweets, cursor, count, limit)
|
||||||
all_count, end_before, end_after = check
|
count, end_before, end_after = check
|
||||||
|
|
||||||
if end_before:
|
if end_before:
|
||||||
return
|
return
|
||||||
@ -171,85 +232,160 @@ class Search:
|
|||||||
|
|
||||||
async def search(self, q: str, limit=-1):
|
async def search(self, q: str, limit=-1):
|
||||||
async for rep in self.search_raw(q, limit=limit):
|
async for rep in self.search_raw(q, limit=limit):
|
||||||
data = rep.json()
|
res = rep.json()
|
||||||
items = list(data.get("globalObjects", {}).get("tweets", {}).values())
|
obj = res.get("globalObjects", {})
|
||||||
for x in items:
|
for x in list(obj.get("tweets", {}).values()):
|
||||||
yield Tweet.parse(x, data)
|
yield Tweet.parse(x, obj)
|
||||||
|
|
||||||
# graphql
|
# user_by_id
|
||||||
|
|
||||||
def get_ql_cursor(self, obj: dict) -> str | None:
|
async def user_by_id_raw(self, uid: int):
|
||||||
try:
|
|
||||||
for entry in get_ql_entries(obj):
|
|
||||||
if entry["entryId"].startswith("cursor-bottom-"):
|
|
||||||
return entry["content"]["value"]
|
|
||||||
return None
|
|
||||||
except Exception:
|
|
||||||
return None
|
|
||||||
|
|
||||||
async def graphql_items(self, op: str, variables: dict, features: dict = {}, limit=-1):
|
|
||||||
url = f"https://twitter.com/i/api/graphql/{op}"
|
|
||||||
features = {**BASE_FEATURES, **features}
|
|
||||||
|
|
||||||
queue, cursor, all_count = op.split("/")[-1], None, 0
|
|
||||||
|
|
||||||
async def _get(client: AsyncClient):
|
|
||||||
params = {"variables": {**variables, "cursor": cursor}, "features": features}
|
|
||||||
return await client.get(url, params=json_params(params))
|
|
||||||
|
|
||||||
async for rep in self._inf_req(queue, _get):
|
|
||||||
data = rep.json()
|
|
||||||
entries, cursor = get_ql_entries(data), self.get_ql_cursor(data)
|
|
||||||
|
|
||||||
# cursor-top / cursor-bottom always present
|
|
||||||
items = [x for x in entries if not x["entryId"].startswith("cursor-")]
|
|
||||||
check = self._check_stop(rep, queue, all_count, items, cursor, limit)
|
|
||||||
all_count, end_before, end_after = check
|
|
||||||
|
|
||||||
if end_before:
|
|
||||||
return
|
|
||||||
|
|
||||||
yield rep
|
|
||||||
|
|
||||||
if end_after:
|
|
||||||
return
|
|
||||||
|
|
||||||
async def graphql_item(self, op: str, variables: dict, features: dict = {}):
|
|
||||||
url = f"https://twitter.com/i/api/graphql/{op}"
|
|
||||||
features = {**BASE_FEATURES, **features}
|
|
||||||
|
|
||||||
async def _get(client: AsyncClient):
|
|
||||||
params = {"variables": {**variables}, "features": features}
|
|
||||||
return await client.get(url, params=json_params(params))
|
|
||||||
|
|
||||||
queue = op.split("/")[-1]
|
|
||||||
async for rep in self._inf_req(queue, _get):
|
|
||||||
msg = [
|
|
||||||
f"{queue}",
|
|
||||||
f"[{rep.headers['x-rate-limit-remaining']}/{rep.headers['x-rate-limit-limit']}]",
|
|
||||||
]
|
|
||||||
logger.debug(" ".join(msg))
|
|
||||||
|
|
||||||
return rep
|
|
||||||
|
|
||||||
async def user_by_login(self, login: str):
|
|
||||||
op = "sLVLhk0bGj3MVFEKTdax1w/UserByScreenName"
|
|
||||||
kv = {"screen_name": login, "withSafetyModeUserFields": True}
|
|
||||||
return await self.graphql_item(op, kv)
|
|
||||||
|
|
||||||
async def user_by_id(self, uid: int):
|
|
||||||
op = "GazOglcBvgLigl3ywt6b3Q/UserByRestId"
|
op = "GazOglcBvgLigl3ywt6b3Q/UserByRestId"
|
||||||
kv = {"userId": str(uid), "withSafetyModeUserFields": True}
|
kv = {"userId": str(uid), "withSafetyModeUserFields": True}
|
||||||
return await self.graphql_item(op, kv)
|
return await self._ql_item(op, kv)
|
||||||
|
|
||||||
async def retweeters(self, twid: int, limit=-1):
|
async def user_by_id(self, uid: int):
|
||||||
|
rep = await self.user_by_id_raw(uid)
|
||||||
|
res = rep.json()
|
||||||
|
return User.parse(to_old_obj(res["data"]["user"]["result"]))
|
||||||
|
|
||||||
|
# user_by_login
|
||||||
|
|
||||||
|
async def user_by_login_raw(self, login: str):
|
||||||
|
op = "sLVLhk0bGj3MVFEKTdax1w/UserByScreenName"
|
||||||
|
kv = {"screen_name": login, "withSafetyModeUserFields": True}
|
||||||
|
return await self._ql_item(op, kv)
|
||||||
|
|
||||||
|
async def user_by_login(self, login: str):
|
||||||
|
rep = await self.user_by_login_raw(login)
|
||||||
|
res = rep.json()
|
||||||
|
return User.parse(to_old_obj(res["data"]["user"]["result"]))
|
||||||
|
|
||||||
|
# tweet_details
|
||||||
|
|
||||||
|
async def tweet_details_raw(self, twid: int):
|
||||||
|
op = "zXaXQgfyR4GxE21uwYQSyA/TweetDetail"
|
||||||
|
kv = {
|
||||||
|
"focalTweetId": str(twid),
|
||||||
|
"referrer": "tweet", # tweet, profile
|
||||||
|
"with_rux_injections": False,
|
||||||
|
"includePromotedContent": True,
|
||||||
|
"withCommunity": True,
|
||||||
|
"withQuickPromoteEligibilityTweetFields": True,
|
||||||
|
"withBirdwatchNotes": True,
|
||||||
|
"withVoice": True,
|
||||||
|
"withV2Timeline": True,
|
||||||
|
"withDownvotePerspective": False,
|
||||||
|
"withReactionsMetadata": False,
|
||||||
|
"withReactionsPerspective": False,
|
||||||
|
"withSuperFollowsTweetFields": False,
|
||||||
|
"withSuperFollowsUserFields": False,
|
||||||
|
}
|
||||||
|
ft = {
|
||||||
|
"responsive_web_twitter_blue_verified_badge_is_enabled": True,
|
||||||
|
"longform_notetweets_richtext_consumption_enabled": True,
|
||||||
|
}
|
||||||
|
return await self._ql_item(op, kv, ft)
|
||||||
|
|
||||||
|
async def tweet_details(self, twid: int):
|
||||||
|
rep = await self.tweet_details_raw(twid)
|
||||||
|
obj = to_search_like(rep.json())
|
||||||
|
return Tweet.parse(obj["tweets"][str(twid)], obj)
|
||||||
|
|
||||||
|
# followers
|
||||||
|
|
||||||
|
async def followers_raw(self, uid: int, limit=-1):
|
||||||
|
op = "djdTXDIk2qhd4OStqlUFeQ/Followers"
|
||||||
|
kv = {"userId": str(uid), "count": 20, "includePromotedContent": False}
|
||||||
|
async for x in self._ql_items(op, kv, limit=limit):
|
||||||
|
yield x
|
||||||
|
|
||||||
|
async def followers(self, uid: int, limit=-1):
|
||||||
|
async for rep in self.followers_raw(uid, limit=limit):
|
||||||
|
obj = to_search_like(rep.json())
|
||||||
|
for _, v in obj["users"].items():
|
||||||
|
yield User.parse(v)
|
||||||
|
|
||||||
|
# following
|
||||||
|
|
||||||
|
async def following_raw(self, uid: int, limit=-1):
|
||||||
|
op = "IWP6Zt14sARO29lJT35bBw/Following"
|
||||||
|
kv = {"userId": str(uid), "count": 20, "includePromotedContent": False}
|
||||||
|
async for x in self._ql_items(op, kv, limit=limit):
|
||||||
|
yield x
|
||||||
|
|
||||||
|
async def following(self, uid: int, limit=-1):
|
||||||
|
async for rep in self.following_raw(uid, limit=limit):
|
||||||
|
obj = to_search_like(rep.json())
|
||||||
|
for _, v in obj["users"].items():
|
||||||
|
yield User.parse(v)
|
||||||
|
|
||||||
|
# retweeters
|
||||||
|
|
||||||
|
async def retweeters_raw(self, twid: int, limit=-1):
|
||||||
op = "U5f_jm0CiLmSfI1d4rGleQ/Retweeters"
|
op = "U5f_jm0CiLmSfI1d4rGleQ/Retweeters"
|
||||||
kv = {"tweetId": str(twid), "count": 20, "includePromotedContent": True}
|
kv = {"tweetId": str(twid), "count": 20, "includePromotedContent": True}
|
||||||
async for x in self.graphql_items(op, kv, limit=limit):
|
async for x in self._ql_items(op, kv, limit=limit):
|
||||||
|
yield x
|
||||||
|
|
||||||
|
async def retweeters(self, twid: int, limit=-1):
|
||||||
|
async for rep in self.retweeters_raw(twid, limit=limit):
|
||||||
|
obj = to_search_like(rep.json())
|
||||||
|
for _, v in obj["users"].items():
|
||||||
|
yield User.parse(v)
|
||||||
|
|
||||||
|
# favoriters
|
||||||
|
|
||||||
|
async def favoriters_raw(self, twid: int, limit=-1):
|
||||||
|
op = "vcTrPlh9ovFDQejz22q9vg/Favoriters"
|
||||||
|
kv = {"tweetId": str(twid), "count": 20, "includePromotedContent": True}
|
||||||
|
async for x in self._ql_items(op, kv, limit=limit):
|
||||||
yield x
|
yield x
|
||||||
|
|
||||||
async def favoriters(self, twid: int, limit=-1):
|
async def favoriters(self, twid: int, limit=-1):
|
||||||
op = "vcTrPlh9ovFDQejz22q9vg/Favoriters"
|
async for rep in self.favoriters_raw(twid, limit=limit):
|
||||||
kv = {"tweetId": str(twid), "count": 20, "includePromotedContent": True}
|
obj = to_search_like(rep.json())
|
||||||
async for x in self.graphql_items(op, kv, limit=limit):
|
for _, v in obj["users"].items():
|
||||||
|
yield User.parse(v)
|
||||||
|
|
||||||
|
# user_tweets
|
||||||
|
|
||||||
|
async def user_tweets_raw(self, uid: int, limit=-1):
|
||||||
|
op = "CdG2Vuc1v6F5JyEngGpxVw/UserTweets"
|
||||||
|
kv = {
|
||||||
|
"userId": str(uid),
|
||||||
|
"count": 40,
|
||||||
|
"includePromotedContent": True,
|
||||||
|
"withQuickPromoteEligibilityTweetFields": True,
|
||||||
|
"withVoice": True,
|
||||||
|
"withV2Timeline": True,
|
||||||
|
}
|
||||||
|
async for x in self._ql_items(op, kv, limit=limit):
|
||||||
yield x
|
yield x
|
||||||
|
|
||||||
|
async def user_tweets(self, uid: int, limit=-1):
|
||||||
|
async for rep in self.user_tweets_raw(uid, limit=limit):
|
||||||
|
obj = to_search_like(rep.json())
|
||||||
|
for _, v in obj["tweets"].items():
|
||||||
|
yield Tweet.parse(v, obj)
|
||||||
|
|
||||||
|
# user_tweets_and_replies
|
||||||
|
|
||||||
|
async def user_tweets_and_replies_raw(self, uid: int, limit=-1):
|
||||||
|
op = "zQxfEr5IFxQ2QZ-XMJlKew/UserTweetsAndReplies"
|
||||||
|
kv = {
|
||||||
|
"userId": str(uid),
|
||||||
|
"count": 40,
|
||||||
|
"includePromotedContent": True,
|
||||||
|
"withCommunity": True,
|
||||||
|
"withVoice": True,
|
||||||
|
"withV2Timeline": True,
|
||||||
|
}
|
||||||
|
async for x in self._ql_items(op, kv, limit=limit):
|
||||||
|
yield x
|
||||||
|
|
||||||
|
async def user_tweets_and_replies(self, uid: int, limit=-1):
|
||||||
|
async for rep in self.user_tweets_and_replies_raw(uid, limit=limit):
|
||||||
|
obj = to_search_like(rep.json())
|
||||||
|
for _, v in obj["tweets"].items():
|
||||||
|
yield Tweet.parse(v, obj)
|
||||||
|
|||||||
@ -1,8 +1,49 @@
|
|||||||
|
import json
|
||||||
|
from collections import defaultdict
|
||||||
from typing import Any, TypeVar
|
from typing import Any, TypeVar
|
||||||
|
|
||||||
|
from httpx import HTTPStatusError, Response
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
T = TypeVar("T")
|
T = TypeVar("T")
|
||||||
|
|
||||||
|
|
||||||
|
def raise_for_status(rep: Response, label: str):
|
||||||
|
try:
|
||||||
|
rep.raise_for_status()
|
||||||
|
except HTTPStatusError as e:
|
||||||
|
logger.debug(f"{label} - {rep.status_code} - {rep.text}")
|
||||||
|
raise e
|
||||||
|
|
||||||
|
|
||||||
|
def encode_params(obj: dict):
|
||||||
|
res = {}
|
||||||
|
for k, v in obj.items():
|
||||||
|
if isinstance(v, dict):
|
||||||
|
v = {a: b for a, b in v.items() if b is not None}
|
||||||
|
v = json.dumps(v, separators=(",", ":"))
|
||||||
|
|
||||||
|
res[k] = str(v)
|
||||||
|
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
def get_or(obj: dict, key: str, default_value: T = None) -> Any | T:
|
||||||
|
for part in key.split("."):
|
||||||
|
if part not in obj:
|
||||||
|
return default_value
|
||||||
|
obj = obj[part]
|
||||||
|
return obj
|
||||||
|
|
||||||
|
|
||||||
|
def int_or_none(obj: dict, key: str):
|
||||||
|
try:
|
||||||
|
val = get_or(obj, key)
|
||||||
|
return int(val) if val is not None else None
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
# https://stackoverflow.com/a/43184871
|
# https://stackoverflow.com/a/43184871
|
||||||
def find_item(obj: dict, key: str, default=None):
|
def find_item(obj: dict, key: str, default=None):
|
||||||
stack = [iter(obj.items())]
|
stack = [iter(obj.items())]
|
||||||
@ -21,17 +62,33 @@ def find_item(obj: dict, key: str, default=None):
|
|||||||
return default
|
return default
|
||||||
|
|
||||||
|
|
||||||
def get_or(obj: dict, key: str, default_value: T = None) -> Any | T:
|
def get_typed_object(obj: dict, res: defaultdict[str, list]):
|
||||||
for part in key.split("."):
|
obj_type = obj.get("__typename", None)
|
||||||
if part not in obj:
|
if obj_type is not None:
|
||||||
return default_value
|
res[obj_type].append(obj)
|
||||||
obj = obj[part]
|
|
||||||
return obj
|
for k, v in obj.items():
|
||||||
|
if isinstance(v, dict):
|
||||||
|
get_typed_object(v, res)
|
||||||
|
elif isinstance(v, list):
|
||||||
|
for x in v:
|
||||||
|
if isinstance(x, dict):
|
||||||
|
get_typed_object(x, res)
|
||||||
|
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
def int_or_none(obj: dict, key: str):
|
def to_old_obj(obj: dict):
|
||||||
try:
|
return {**obj, **obj["legacy"], "id_str": str(obj["rest_id"]), "id": int(obj["rest_id"])}
|
||||||
val = get_or(obj, key)
|
|
||||||
return int(val) if val is not None else None
|
|
||||||
except Exception:
|
def to_search_like(obj: dict):
|
||||||
return None
|
tmp = get_typed_object(obj, defaultdict(list))
|
||||||
|
|
||||||
|
tweets = [x for x in tmp.get("Tweet", []) if "legacy" in x]
|
||||||
|
tweets = {str(x["rest_id"]): to_old_obj(x) for x in tweets}
|
||||||
|
|
||||||
|
users = [x for x in tmp.get("User", []) if "legacy" in x and "id" in x]
|
||||||
|
users = {str(x["rest_id"]): to_old_obj(x) for x in users}
|
||||||
|
|
||||||
|
return {"tweets": tweets, "users": users}
|
||||||
|
|||||||
Загрузка…
x
Ссылка в новой задаче
Block a user