зеркало из
https://github.com/viginum-datalab/twscrape.git
synced 2025-10-29 13:06:13 +02:00
add account import with cookies; update mocks
Этот коммит содержится в:
родитель
22c29b673d
Коммит
4938ca7d6c
12
Makefile
12
Makefile
@ -41,3 +41,15 @@ changelog:
|
||||
|
||||
test34:
|
||||
docker build -f Dockerfile-test .
|
||||
|
||||
update-mocks:
|
||||
twscrape user_by_id --raw 2244994945 | jq > ./tests/mocked-data/user_by_id_raw.json
|
||||
twscrape user_by_login --raw twitterdev | jq > ./tests/mocked-data/user_by_login_raw.json
|
||||
twscrape followers --raw --limit 10 2244994945 | jq > ./tests/mocked-data/followers_raw.json
|
||||
twscrape following --raw --limit 10 2244994945 | jq > ./tests/mocked-data/following_raw.json
|
||||
twscrape tweet_details --raw 1649191520250245121 | jq > ./tests/mocked-data/tweet_details_raw.json
|
||||
twscrape retweeters --raw --limit 10 1649191520250245121 | jq > ./tests/mocked-data/retweeters_raw.json
|
||||
twscrape favoriters --raw --limit 10 1649191520250245121 | jq > ./tests/mocked-data/favoriters_raw.json
|
||||
twscrape user_tweets --raw --limit 10 2244994945 | jq > ./tests/mocked-data/user_tweets_raw.json
|
||||
twscrape user_tweets_and_replies --raw --limit 10 2244994945 | jq > ./tests/mocked-data/user_tweets_and_replies_raw.json
|
||||
twscrape search --raw --limit 10 "elon musk lang:en" | jq > ./tests/mocked-data/search_raw.json
|
||||
|
||||
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
@ -48,25 +48,25 @@
|
||||
}
|
||||
},
|
||||
"fast_followers_count": 0,
|
||||
"favourites_count": 2128,
|
||||
"followers_count": 583721,
|
||||
"friends_count": 1941,
|
||||
"favourites_count": 2152,
|
||||
"followers_count": 587135,
|
||||
"friends_count": 1945,
|
||||
"has_custom_timelines": true,
|
||||
"is_translator": false,
|
||||
"listed_count": 2395,
|
||||
"listed_count": 2459,
|
||||
"location": "127.0.0.1",
|
||||
"media_count": 815,
|
||||
"name": "Twitter Dev",
|
||||
"normal_followers_count": 583721,
|
||||
"normal_followers_count": 587135,
|
||||
"pinned_tweet_ids_str": [
|
||||
"1641222782594990080"
|
||||
"1661790253886177280"
|
||||
],
|
||||
"possibly_sensitive": false,
|
||||
"profile_banner_url": "https://pbs.twimg.com/profile_banners/2244994945/1660405530",
|
||||
"profile_image_url_https": "https://pbs.twimg.com/profile_images/1445764922474827784/W2zEPN7U_normal.jpg",
|
||||
"profile_interstitial_type": "",
|
||||
"screen_name": "TwitterDev",
|
||||
"statuses_count": 4076,
|
||||
"statuses_count": 4080,
|
||||
"translator_type": "regular",
|
||||
"url": "https://t.co/9wI31m3ELF",
|
||||
"verified": false,
|
||||
@ -87,8 +87,14 @@
|
||||
},
|
||||
"smart_blocked_by": false,
|
||||
"smart_blocking": false,
|
||||
"business_account": {}
|
||||
"business_account": {},
|
||||
"highlights_info": {
|
||||
"can_highlight_tweets": true,
|
||||
"highlighted_tweets": "0"
|
||||
},
|
||||
"creator_subscriptions_count": 0,
|
||||
"has_hidden_likes_on_profile": false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -48,25 +48,25 @@
|
||||
}
|
||||
},
|
||||
"fast_followers_count": 0,
|
||||
"favourites_count": 2128,
|
||||
"followers_count": 583731,
|
||||
"friends_count": 1941,
|
||||
"favourites_count": 2152,
|
||||
"followers_count": 587135,
|
||||
"friends_count": 1945,
|
||||
"has_custom_timelines": true,
|
||||
"is_translator": false,
|
||||
"listed_count": 2395,
|
||||
"listed_count": 2459,
|
||||
"location": "127.0.0.1",
|
||||
"media_count": 815,
|
||||
"name": "Twitter Dev",
|
||||
"normal_followers_count": 583731,
|
||||
"normal_followers_count": 587135,
|
||||
"pinned_tweet_ids_str": [
|
||||
"1641222782594990080"
|
||||
"1661790253886177280"
|
||||
],
|
||||
"possibly_sensitive": false,
|
||||
"profile_banner_url": "https://pbs.twimg.com/profile_banners/2244994945/1660405530",
|
||||
"profile_image_url_https": "https://pbs.twimg.com/profile_images/1445764922474827784/W2zEPN7U_normal.jpg",
|
||||
"profile_interstitial_type": "",
|
||||
"screen_name": "TwitterDev",
|
||||
"statuses_count": 4076,
|
||||
"statuses_count": 4080,
|
||||
"translator_type": "regular",
|
||||
"url": "https://t.co/9wI31m3ELF",
|
||||
"verified": false,
|
||||
@ -96,6 +96,7 @@
|
||||
}
|
||||
},
|
||||
"is_profile_translatable": false,
|
||||
"has_hidden_likes_on_profile": false,
|
||||
"verification_info": {
|
||||
"reason": {
|
||||
"description": {
|
||||
@ -113,16 +114,22 @@
|
||||
"from_index": 75,
|
||||
"to_index": 85,
|
||||
"ref": {
|
||||
"url": "https://help.twitter.com/en/rules-and-policies/profile-labels",
|
||||
"url": "https://twitter.com/i/twitter_blue_sign_up",
|
||||
"url_type": "ExternalUrl"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"verified_since_msec": "1395425700472"
|
||||
}
|
||||
},
|
||||
"business_account": {}
|
||||
"highlights_info": {
|
||||
"can_highlight_tweets": true,
|
||||
"highlighted_tweets": "0"
|
||||
},
|
||||
"business_account": {},
|
||||
"creator_subscriptions_count": 0
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
18
tests/test_utils.py
Обычный файл
18
tests/test_utils.py
Обычный файл
@ -0,0 +1,18 @@
|
||||
from twscrape.utils import parse_cookies
|
||||
|
||||
|
||||
def test_cookies_parse():
|
||||
val = "abc=123; def=456; ghi=789"
|
||||
assert parse_cookies(val) == {"abc": "123", "def": "456", "ghi": "789"}
|
||||
|
||||
val = '{"abc": "123", "def": "456", "ghi": "789"}'
|
||||
assert parse_cookies(val) == {"abc": "123", "def": "456", "ghi": "789"}
|
||||
|
||||
val = '[{"name": "abc", "value": "123"}, {"name": "def", "value": "456"}, {"name": "ghi", "value": "789"}]'
|
||||
assert parse_cookies(val) == {"abc": "123", "def": "456", "ghi": "789"}
|
||||
|
||||
val = "eyJhYmMiOiAiMTIzIiwgImRlZiI6ICI0NTYiLCAiZ2hpIjogIjc4OSJ9"
|
||||
assert parse_cookies(val) == {"abc": "123", "def": "456", "ghi": "789"}
|
||||
|
||||
val = "W3sibmFtZSI6ICJhYmMiLCAidmFsdWUiOiAiMTIzIn0sIHsibmFtZSI6ICJkZWYiLCAidmFsdWUiOiAiNDU2In0sIHsibmFtZSI6ICJnaGkiLCAidmFsdWUiOiAiNzg5In1d"
|
||||
assert parse_cookies(val) == {"abc": "123", "def": "456", "ghi": "789"}
|
||||
@ -62,4 +62,7 @@ class Account(JSONTrait):
|
||||
client.headers["x-twitter-active-user"] = "yes"
|
||||
client.headers["x-twitter-client-language"] = "en"
|
||||
|
||||
if "ct0" in client.cookies:
|
||||
client.headers["x-csrf-token"] = client.cookies["ct0"]
|
||||
|
||||
return client
|
||||
|
||||
@ -11,7 +11,7 @@ from .account import Account
|
||||
from .db import execute, fetchall, fetchone
|
||||
from .logger import logger
|
||||
from .login import login
|
||||
from .utils import utc_ts
|
||||
from .utils import parse_cookies, utc_ts
|
||||
|
||||
|
||||
class AccountInfo(TypedDict):
|
||||
@ -35,17 +35,17 @@ class AccountsPool:
|
||||
self._db_file = db_file
|
||||
|
||||
async def load_from_file(self, filepath: str, line_format: str):
|
||||
assert "username" in line_format, "username is required"
|
||||
assert "password" in line_format, "password is required"
|
||||
assert "email" in line_format, "email is required"
|
||||
assert "email_password" in line_format, "email_password is required"
|
||||
|
||||
line_delim = guess_delim(line_format)
|
||||
tokens = line_format.split(line_delim)
|
||||
|
||||
required = set(["username", "password", "email", "email_password"])
|
||||
if not required.issubset(tokens):
|
||||
raise ValueError(f"Invalid line format: {line_format}")
|
||||
|
||||
with open(filepath, "r") as f:
|
||||
lines = f.read().split("\n")
|
||||
lines = [x.strip() for x in lines if x.strip()]
|
||||
|
||||
for line in lines:
|
||||
data = [x.strip() for x in line.split(line_delim)]
|
||||
if len(data) < len(tokens):
|
||||
@ -53,7 +53,8 @@ class AccountsPool:
|
||||
continue
|
||||
|
||||
data = data[: len(tokens)]
|
||||
await self.add_account(**{k: v for k, v in zip(tokens, data)})
|
||||
vals = {k: v for k, v in zip(tokens, data) if k != "_"}
|
||||
await self.add_account(**vals)
|
||||
|
||||
async def add_account(
|
||||
self,
|
||||
@ -63,6 +64,7 @@ class AccountsPool:
|
||||
email_password: str,
|
||||
user_agent: str | None = None,
|
||||
proxy: str | None = None,
|
||||
cookies: str | None = None,
|
||||
):
|
||||
qs = "SELECT * FROM accounts WHERE username = :username"
|
||||
rs = await fetchone(self._db_file, qs, {"username": username})
|
||||
@ -82,9 +84,13 @@ class AccountsPool:
|
||||
locks={},
|
||||
stats={},
|
||||
headers={},
|
||||
cookies={},
|
||||
cookies=parse_cookies(cookies) if cookies else {},
|
||||
proxy=proxy,
|
||||
)
|
||||
|
||||
if "ct0" in account.cookies:
|
||||
account.active = True
|
||||
|
||||
await self.save(account)
|
||||
|
||||
async def delete_accounts(self, usernames: str | list[str]):
|
||||
|
||||
@ -7,12 +7,7 @@ from .queue_client import QueueClient
|
||||
from .utils import encode_params, find_obj, get_by_path, to_old_obj, to_old_rep
|
||||
|
||||
SEARCH_FEATURES = {
|
||||
"rweb_lists_timeline_redesign_enabled": True,
|
||||
"creator_subscriptions_tweet_preview_api_enabled": True,
|
||||
"responsive_web_twitter_article_tweet_consumption_enabled": False,
|
||||
"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": True,
|
||||
"responsive_web_media_download_video_enabled": False,
|
||||
"longform_notetweets_inline_media_enabled": True,
|
||||
}
|
||||
|
||||
|
||||
@ -109,7 +104,12 @@ class API:
|
||||
async def user_by_id_raw(self, uid: int, kv=None):
|
||||
op = OP_UserByRestId
|
||||
kv = {"userId": str(uid), "withSafetyModeUserFields": True, **(kv or {})}
|
||||
return await self._gql_item(op, kv)
|
||||
ft = {
|
||||
"hidden_profile_likes_enabled": True,
|
||||
"highlights_tweets_tab_ui_enabled": True,
|
||||
"creator_subscriptions_tweet_preview_api_enabled": True,
|
||||
}
|
||||
return await self._gql_item(op, kv, ft)
|
||||
|
||||
async def user_by_id(self, uid: int, kv=None):
|
||||
rep = await self.user_by_id_raw(uid, kv=kv)
|
||||
@ -121,7 +121,13 @@ class API:
|
||||
async def user_by_login_raw(self, login: str, kv=None):
|
||||
op = OP_UserByScreenName
|
||||
kv = {"screen_name": login, "withSafetyModeUserFields": True, **(kv or {})}
|
||||
return await self._gql_item(op, kv)
|
||||
ft = {
|
||||
"highlights_tweets_tab_ui_enabled": True,
|
||||
"hidden_profile_likes_enabled": True,
|
||||
"creator_subscriptions_tweet_preview_api_enabled": True,
|
||||
"subscriptions_verification_info_verified_since_enabled": True,
|
||||
}
|
||||
return await self._gql_item(op, kv, ft)
|
||||
|
||||
async def user_by_login(self, login: str, kv=None):
|
||||
rep = await self.user_by_login_raw(login, kv=kv)
|
||||
|
||||
@ -3,6 +3,18 @@ TOKEN = "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Z
|
||||
GQL_URL = "https://twitter.com/i/api/graphql"
|
||||
LOGIN_URL = "https://api.twitter.com/1.1/onboarding/task.json"
|
||||
|
||||
OP_SearchTimeline = "L1VfBERtzc3VkBBT0YAYHA/SearchTimeline"
|
||||
OP_UserByRestId = "Lxg1V9AiIzzXEiP2c8dRnw/UserByRestId"
|
||||
OP_UserByScreenName = "oUZZZ8Oddwxs8Cd3iW3UEA/UserByScreenName"
|
||||
OP_TweetDetail = "NmCeCgkVlsRGS1cAwqtgmw/TweetDetail"
|
||||
OP_Followers = "FKV1jfu4AawGapl2KCZbQw/Followers"
|
||||
OP_Following = "sKlU5dd_nanz9P2CxBt2sg/Following"
|
||||
OP_Retweeters = "Gnw_Swm60cS-biSLn2OWNw/Retweeters"
|
||||
OP_Favoriters = "rUyh8HWk8IXv_fvVKj3QjA/Favoriters"
|
||||
OP_UserTweets = "x8SpjuBpqoww-edf0aUUKA/UserTweets"
|
||||
OP_UserTweetsAndReplies = "RB2KVuVBRZe4GW8KkoVF2A/UserTweetsAndReplies"
|
||||
OP_ListLatestTweetsTimeline = "2Vjeyo_L0nizAUhHe3fKyA/ListLatestTweetsTimeline"
|
||||
|
||||
GQL_FEATURES = {
|
||||
"blue_business_profile_image_shape_enabled": True,
|
||||
"responsive_web_graphql_exclude_directive_enabled": True,
|
||||
@ -23,16 +35,9 @@ GQL_FEATURES = {
|
||||
"responsive_web_text_conversations_enabled": False,
|
||||
"longform_notetweets_rich_text_read_enabled": True,
|
||||
"responsive_web_enhance_cards_enabled": False,
|
||||
"creator_subscriptions_tweet_preview_api_enabled": True,
|
||||
"longform_notetweets_inline_media_enabled": True,
|
||||
"responsive_web_media_download_video_enabled": False,
|
||||
"rweb_lists_timeline_redesign_enabled": True,
|
||||
"responsive_web_twitter_article_tweet_consumption_enabled": False,
|
||||
}
|
||||
|
||||
OP_SearchTimeline = "L1VfBERtzc3VkBBT0YAYHA/SearchTimeline"
|
||||
OP_UserByRestId = "Lxg1V9AiIzzXEiP2c8dRnw/UserByRestId"
|
||||
OP_UserByScreenName = "oUZZZ8Oddwxs8Cd3iW3UEA/UserByScreenName"
|
||||
OP_TweetDetail = "NmCeCgkVlsRGS1cAwqtgmw/TweetDetail"
|
||||
OP_Followers = "FKV1jfu4AawGapl2KCZbQw/Followers"
|
||||
OP_Following = "sKlU5dd_nanz9P2CxBt2sg/Following"
|
||||
OP_Retweeters = "Gnw_Swm60cS-biSLn2OWNw/Retweeters"
|
||||
OP_Favoriters = "rUyh8HWk8IXv_fvVKj3QjA/Favoriters"
|
||||
OP_UserTweets = "x8SpjuBpqoww-edf0aUUKA/UserTweets"
|
||||
OP_UserTweetsAndReplies = "RB2KVuVBRZe4GW8KkoVF2A/UserTweetsAndReplies"
|
||||
OP_ListLatestTweetsTimeline = "2Vjeyo_L0nizAUhHe3fKyA/ListLatestTweetsTimeline"
|
||||
|
||||
@ -112,14 +112,21 @@ class QueueClient:
|
||||
except json.JSONDecodeError:
|
||||
res: Any = {"_raw": rep.text}
|
||||
|
||||
fn = logger.info if rep.status_code == 200 else logger.warning
|
||||
fn(f"{rep.status_code:3d} - {req_id(rep)}")
|
||||
|
||||
msg = "OK"
|
||||
if "errors" in res:
|
||||
msg = "; ".join([x["message"] for x in res["errors"]])
|
||||
if rep.status_code == 200 and "_Missing: No status found with that ID." in msg:
|
||||
return # ignore this error
|
||||
msg = "; ".join([f'({x.get("code", -1)}) {x["message"]}' for x in res["errors"]])
|
||||
|
||||
fn = logger.info if rep.status_code == 200 else logger.warning
|
||||
fn(f"{rep.status_code:3d} - {req_id(rep)} - {msg}")
|
||||
|
||||
if msg.startswith("The following features cannot be null"):
|
||||
logger.error(f"Invalid request: {msg}")
|
||||
exit(1)
|
||||
|
||||
if rep.status_code == 200 and "_Missing: No status found with that ID." in msg:
|
||||
return # ignore this error
|
||||
|
||||
if msg != "OK":
|
||||
raise ApiError(rep, res)
|
||||
|
||||
rep.raise_for_status()
|
||||
|
||||
@ -1,7 +1,8 @@
|
||||
import base64
|
||||
import json
|
||||
from collections import defaultdict
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, AsyncGenerator, Callable, TypedDict, TypeVar
|
||||
from typing import Any, AsyncGenerator, Callable, TypeVar
|
||||
|
||||
from httpx import HTTPStatusError, Response
|
||||
|
||||
@ -181,3 +182,26 @@ def print_table(rows: list[dict], hr_after=False):
|
||||
print("\n".join(lines))
|
||||
if hr_after:
|
||||
print("-" * max_len)
|
||||
|
||||
|
||||
def parse_cookies(val: str) -> dict[str, str]:
|
||||
try:
|
||||
val = base64.b64decode(val).decode()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
try:
|
||||
res = json.loads(val)
|
||||
if isinstance(res, list):
|
||||
return {x["name"]: x["value"] for x in res}
|
||||
if isinstance(res, dict):
|
||||
return res
|
||||
except json.JSONDecodeError:
|
||||
res = val.split("; ")
|
||||
res = [x.split("=") for x in res]
|
||||
return {x[0]: x[1] for x in res}
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
raise ValueError(f"Invalid cookie value: {val}")
|
||||
|
||||
Загрузка…
x
Ссылка в новой задаче
Block a user