add account import with cookies; update mocks

Этот коммит содержится в:
Vlad Pronsky 2023-07-14 23:10:06 +03:00
родитель 22c29b673d
Коммит 4938ca7d6c
18 изменённых файлов: 17555 добавлений и 20667 удалений

Просмотреть файл

@ -41,3 +41,15 @@ changelog:
test34: test34:
docker build -f Dockerfile-test . docker build -f Dockerfile-test .
update-mocks:
twscrape user_by_id --raw 2244994945 | jq > ./tests/mocked-data/user_by_id_raw.json
twscrape user_by_login --raw twitterdev | jq > ./tests/mocked-data/user_by_login_raw.json
twscrape followers --raw --limit 10 2244994945 | jq > ./tests/mocked-data/followers_raw.json
twscrape following --raw --limit 10 2244994945 | jq > ./tests/mocked-data/following_raw.json
twscrape tweet_details --raw 1649191520250245121 | jq > ./tests/mocked-data/tweet_details_raw.json
twscrape retweeters --raw --limit 10 1649191520250245121 | jq > ./tests/mocked-data/retweeters_raw.json
twscrape favoriters --raw --limit 10 1649191520250245121 | jq > ./tests/mocked-data/favoriters_raw.json
twscrape user_tweets --raw --limit 10 2244994945 | jq > ./tests/mocked-data/user_tweets_raw.json
twscrape user_tweets_and_replies --raw --limit 10 2244994945 | jq > ./tests/mocked-data/user_tweets_and_replies_raw.json
twscrape search --raw --limit 10 "elon musk lang:en" | jq > ./tests/mocked-data/search_raw.json

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -48,25 +48,25 @@
} }
}, },
"fast_followers_count": 0, "fast_followers_count": 0,
"favourites_count": 2128, "favourites_count": 2152,
"followers_count": 583721, "followers_count": 587135,
"friends_count": 1941, "friends_count": 1945,
"has_custom_timelines": true, "has_custom_timelines": true,
"is_translator": false, "is_translator": false,
"listed_count": 2395, "listed_count": 2459,
"location": "127.0.0.1", "location": "127.0.0.1",
"media_count": 815, "media_count": 815,
"name": "Twitter Dev", "name": "Twitter Dev",
"normal_followers_count": 583721, "normal_followers_count": 587135,
"pinned_tweet_ids_str": [ "pinned_tweet_ids_str": [
"1641222782594990080" "1661790253886177280"
], ],
"possibly_sensitive": false, "possibly_sensitive": false,
"profile_banner_url": "https://pbs.twimg.com/profile_banners/2244994945/1660405530", "profile_banner_url": "https://pbs.twimg.com/profile_banners/2244994945/1660405530",
"profile_image_url_https": "https://pbs.twimg.com/profile_images/1445764922474827784/W2zEPN7U_normal.jpg", "profile_image_url_https": "https://pbs.twimg.com/profile_images/1445764922474827784/W2zEPN7U_normal.jpg",
"profile_interstitial_type": "", "profile_interstitial_type": "",
"screen_name": "TwitterDev", "screen_name": "TwitterDev",
"statuses_count": 4076, "statuses_count": 4080,
"translator_type": "regular", "translator_type": "regular",
"url": "https://t.co/9wI31m3ELF", "url": "https://t.co/9wI31m3ELF",
"verified": false, "verified": false,
@ -87,7 +87,13 @@
}, },
"smart_blocked_by": false, "smart_blocked_by": false,
"smart_blocking": false, "smart_blocking": false,
"business_account": {} "business_account": {},
"highlights_info": {
"can_highlight_tweets": true,
"highlighted_tweets": "0"
},
"creator_subscriptions_count": 0,
"has_hidden_likes_on_profile": false
} }
} }
} }

Просмотреть файл

@ -48,25 +48,25 @@
} }
}, },
"fast_followers_count": 0, "fast_followers_count": 0,
"favourites_count": 2128, "favourites_count": 2152,
"followers_count": 583731, "followers_count": 587135,
"friends_count": 1941, "friends_count": 1945,
"has_custom_timelines": true, "has_custom_timelines": true,
"is_translator": false, "is_translator": false,
"listed_count": 2395, "listed_count": 2459,
"location": "127.0.0.1", "location": "127.0.0.1",
"media_count": 815, "media_count": 815,
"name": "Twitter Dev", "name": "Twitter Dev",
"normal_followers_count": 583731, "normal_followers_count": 587135,
"pinned_tweet_ids_str": [ "pinned_tweet_ids_str": [
"1641222782594990080" "1661790253886177280"
], ],
"possibly_sensitive": false, "possibly_sensitive": false,
"profile_banner_url": "https://pbs.twimg.com/profile_banners/2244994945/1660405530", "profile_banner_url": "https://pbs.twimg.com/profile_banners/2244994945/1660405530",
"profile_image_url_https": "https://pbs.twimg.com/profile_images/1445764922474827784/W2zEPN7U_normal.jpg", "profile_image_url_https": "https://pbs.twimg.com/profile_images/1445764922474827784/W2zEPN7U_normal.jpg",
"profile_interstitial_type": "", "profile_interstitial_type": "",
"screen_name": "TwitterDev", "screen_name": "TwitterDev",
"statuses_count": 4076, "statuses_count": 4080,
"translator_type": "regular", "translator_type": "regular",
"url": "https://t.co/9wI31m3ELF", "url": "https://t.co/9wI31m3ELF",
"verified": false, "verified": false,
@ -96,6 +96,7 @@
} }
}, },
"is_profile_translatable": false, "is_profile_translatable": false,
"has_hidden_likes_on_profile": false,
"verification_info": { "verification_info": {
"reason": { "reason": {
"description": { "description": {
@ -113,15 +114,21 @@
"from_index": 75, "from_index": 75,
"to_index": 85, "to_index": 85,
"ref": { "ref": {
"url": "https://help.twitter.com/en/rules-and-policies/profile-labels", "url": "https://twitter.com/i/twitter_blue_sign_up",
"url_type": "ExternalUrl" "url_type": "ExternalUrl"
} }
} }
] ]
} },
"verified_since_msec": "1395425700472"
} }
}, },
"business_account": {} "highlights_info": {
"can_highlight_tweets": true,
"highlighted_tweets": "0"
},
"business_account": {},
"creator_subscriptions_count": 0
} }
} }
} }

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

18
tests/test_utils.py Обычный файл
Просмотреть файл

@ -0,0 +1,18 @@
from twscrape.utils import parse_cookies
def test_cookies_parse():
val = "abc=123; def=456; ghi=789"
assert parse_cookies(val) == {"abc": "123", "def": "456", "ghi": "789"}
val = '{"abc": "123", "def": "456", "ghi": "789"}'
assert parse_cookies(val) == {"abc": "123", "def": "456", "ghi": "789"}
val = '[{"name": "abc", "value": "123"}, {"name": "def", "value": "456"}, {"name": "ghi", "value": "789"}]'
assert parse_cookies(val) == {"abc": "123", "def": "456", "ghi": "789"}
val = "eyJhYmMiOiAiMTIzIiwgImRlZiI6ICI0NTYiLCAiZ2hpIjogIjc4OSJ9"
assert parse_cookies(val) == {"abc": "123", "def": "456", "ghi": "789"}
val = "W3sibmFtZSI6ICJhYmMiLCAidmFsdWUiOiAiMTIzIn0sIHsibmFtZSI6ICJkZWYiLCAidmFsdWUiOiAiNDU2In0sIHsibmFtZSI6ICJnaGkiLCAidmFsdWUiOiAiNzg5In1d"
assert parse_cookies(val) == {"abc": "123", "def": "456", "ghi": "789"}

Просмотреть файл

@ -62,4 +62,7 @@ class Account(JSONTrait):
client.headers["x-twitter-active-user"] = "yes" client.headers["x-twitter-active-user"] = "yes"
client.headers["x-twitter-client-language"] = "en" client.headers["x-twitter-client-language"] = "en"
if "ct0" in client.cookies:
client.headers["x-csrf-token"] = client.cookies["ct0"]
return client return client

Просмотреть файл

@ -11,7 +11,7 @@ from .account import Account
from .db import execute, fetchall, fetchone from .db import execute, fetchall, fetchone
from .logger import logger from .logger import logger
from .login import login from .login import login
from .utils import utc_ts from .utils import parse_cookies, utc_ts
class AccountInfo(TypedDict): class AccountInfo(TypedDict):
@ -35,17 +35,17 @@ class AccountsPool:
self._db_file = db_file self._db_file = db_file
async def load_from_file(self, filepath: str, line_format: str): async def load_from_file(self, filepath: str, line_format: str):
assert "username" in line_format, "username is required"
assert "password" in line_format, "password is required"
assert "email" in line_format, "email is required"
assert "email_password" in line_format, "email_password is required"
line_delim = guess_delim(line_format) line_delim = guess_delim(line_format)
tokens = line_format.split(line_delim) tokens = line_format.split(line_delim)
required = set(["username", "password", "email", "email_password"])
if not required.issubset(tokens):
raise ValueError(f"Invalid line format: {line_format}")
with open(filepath, "r") as f: with open(filepath, "r") as f:
lines = f.read().split("\n") lines = f.read().split("\n")
lines = [x.strip() for x in lines if x.strip()] lines = [x.strip() for x in lines if x.strip()]
for line in lines: for line in lines:
data = [x.strip() for x in line.split(line_delim)] data = [x.strip() for x in line.split(line_delim)]
if len(data) < len(tokens): if len(data) < len(tokens):
@ -53,7 +53,8 @@ class AccountsPool:
continue continue
data = data[: len(tokens)] data = data[: len(tokens)]
await self.add_account(**{k: v for k, v in zip(tokens, data)}) vals = {k: v for k, v in zip(tokens, data) if k != "_"}
await self.add_account(**vals)
async def add_account( async def add_account(
self, self,
@ -63,6 +64,7 @@ class AccountsPool:
email_password: str, email_password: str,
user_agent: str | None = None, user_agent: str | None = None,
proxy: str | None = None, proxy: str | None = None,
cookies: str | None = None,
): ):
qs = "SELECT * FROM accounts WHERE username = :username" qs = "SELECT * FROM accounts WHERE username = :username"
rs = await fetchone(self._db_file, qs, {"username": username}) rs = await fetchone(self._db_file, qs, {"username": username})
@ -82,9 +84,13 @@ class AccountsPool:
locks={}, locks={},
stats={}, stats={},
headers={}, headers={},
cookies={}, cookies=parse_cookies(cookies) if cookies else {},
proxy=proxy, proxy=proxy,
) )
if "ct0" in account.cookies:
account.active = True
await self.save(account) await self.save(account)
async def delete_accounts(self, usernames: str | list[str]): async def delete_accounts(self, usernames: str | list[str]):

Просмотреть файл

@ -7,12 +7,7 @@ from .queue_client import QueueClient
from .utils import encode_params, find_obj, get_by_path, to_old_obj, to_old_rep from .utils import encode_params, find_obj, get_by_path, to_old_obj, to_old_rep
SEARCH_FEATURES = { SEARCH_FEATURES = {
"rweb_lists_timeline_redesign_enabled": True,
"creator_subscriptions_tweet_preview_api_enabled": True,
"responsive_web_twitter_article_tweet_consumption_enabled": False,
"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": True, "tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": True,
"responsive_web_media_download_video_enabled": False,
"longform_notetweets_inline_media_enabled": True,
} }
@ -109,7 +104,12 @@ class API:
async def user_by_id_raw(self, uid: int, kv=None): async def user_by_id_raw(self, uid: int, kv=None):
op = OP_UserByRestId op = OP_UserByRestId
kv = {"userId": str(uid), "withSafetyModeUserFields": True, **(kv or {})} kv = {"userId": str(uid), "withSafetyModeUserFields": True, **(kv or {})}
return await self._gql_item(op, kv) ft = {
"hidden_profile_likes_enabled": True,
"highlights_tweets_tab_ui_enabled": True,
"creator_subscriptions_tweet_preview_api_enabled": True,
}
return await self._gql_item(op, kv, ft)
async def user_by_id(self, uid: int, kv=None): async def user_by_id(self, uid: int, kv=None):
rep = await self.user_by_id_raw(uid, kv=kv) rep = await self.user_by_id_raw(uid, kv=kv)
@ -121,7 +121,13 @@ class API:
async def user_by_login_raw(self, login: str, kv=None): async def user_by_login_raw(self, login: str, kv=None):
op = OP_UserByScreenName op = OP_UserByScreenName
kv = {"screen_name": login, "withSafetyModeUserFields": True, **(kv or {})} kv = {"screen_name": login, "withSafetyModeUserFields": True, **(kv or {})}
return await self._gql_item(op, kv) ft = {
"highlights_tweets_tab_ui_enabled": True,
"hidden_profile_likes_enabled": True,
"creator_subscriptions_tweet_preview_api_enabled": True,
"subscriptions_verification_info_verified_since_enabled": True,
}
return await self._gql_item(op, kv, ft)
async def user_by_login(self, login: str, kv=None): async def user_by_login(self, login: str, kv=None):
rep = await self.user_by_login_raw(login, kv=kv) rep = await self.user_by_login_raw(login, kv=kv)

Просмотреть файл

@ -3,6 +3,18 @@ TOKEN = "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Z
GQL_URL = "https://twitter.com/i/api/graphql" GQL_URL = "https://twitter.com/i/api/graphql"
LOGIN_URL = "https://api.twitter.com/1.1/onboarding/task.json" LOGIN_URL = "https://api.twitter.com/1.1/onboarding/task.json"
OP_SearchTimeline = "L1VfBERtzc3VkBBT0YAYHA/SearchTimeline"
OP_UserByRestId = "Lxg1V9AiIzzXEiP2c8dRnw/UserByRestId"
OP_UserByScreenName = "oUZZZ8Oddwxs8Cd3iW3UEA/UserByScreenName"
OP_TweetDetail = "NmCeCgkVlsRGS1cAwqtgmw/TweetDetail"
OP_Followers = "FKV1jfu4AawGapl2KCZbQw/Followers"
OP_Following = "sKlU5dd_nanz9P2CxBt2sg/Following"
OP_Retweeters = "Gnw_Swm60cS-biSLn2OWNw/Retweeters"
OP_Favoriters = "rUyh8HWk8IXv_fvVKj3QjA/Favoriters"
OP_UserTweets = "x8SpjuBpqoww-edf0aUUKA/UserTweets"
OP_UserTweetsAndReplies = "RB2KVuVBRZe4GW8KkoVF2A/UserTweetsAndReplies"
OP_ListLatestTweetsTimeline = "2Vjeyo_L0nizAUhHe3fKyA/ListLatestTweetsTimeline"
GQL_FEATURES = { GQL_FEATURES = {
"blue_business_profile_image_shape_enabled": True, "blue_business_profile_image_shape_enabled": True,
"responsive_web_graphql_exclude_directive_enabled": True, "responsive_web_graphql_exclude_directive_enabled": True,
@ -23,16 +35,9 @@ GQL_FEATURES = {
"responsive_web_text_conversations_enabled": False, "responsive_web_text_conversations_enabled": False,
"longform_notetweets_rich_text_read_enabled": True, "longform_notetweets_rich_text_read_enabled": True,
"responsive_web_enhance_cards_enabled": False, "responsive_web_enhance_cards_enabled": False,
"creator_subscriptions_tweet_preview_api_enabled": True,
"longform_notetweets_inline_media_enabled": True,
"responsive_web_media_download_video_enabled": False,
"rweb_lists_timeline_redesign_enabled": True,
"responsive_web_twitter_article_tweet_consumption_enabled": False,
} }
OP_SearchTimeline = "L1VfBERtzc3VkBBT0YAYHA/SearchTimeline"
OP_UserByRestId = "Lxg1V9AiIzzXEiP2c8dRnw/UserByRestId"
OP_UserByScreenName = "oUZZZ8Oddwxs8Cd3iW3UEA/UserByScreenName"
OP_TweetDetail = "NmCeCgkVlsRGS1cAwqtgmw/TweetDetail"
OP_Followers = "FKV1jfu4AawGapl2KCZbQw/Followers"
OP_Following = "sKlU5dd_nanz9P2CxBt2sg/Following"
OP_Retweeters = "Gnw_Swm60cS-biSLn2OWNw/Retweeters"
OP_Favoriters = "rUyh8HWk8IXv_fvVKj3QjA/Favoriters"
OP_UserTweets = "x8SpjuBpqoww-edf0aUUKA/UserTweets"
OP_UserTweetsAndReplies = "RB2KVuVBRZe4GW8KkoVF2A/UserTweetsAndReplies"
OP_ListLatestTweetsTimeline = "2Vjeyo_L0nizAUhHe3fKyA/ListLatestTweetsTimeline"

Просмотреть файл

@ -112,14 +112,21 @@ class QueueClient:
except json.JSONDecodeError: except json.JSONDecodeError:
res: Any = {"_raw": rep.text} res: Any = {"_raw": rep.text}
fn = logger.info if rep.status_code == 200 else logger.warning msg = "OK"
fn(f"{rep.status_code:3d} - {req_id(rep)}")
if "errors" in res: if "errors" in res:
msg = "; ".join([x["message"] for x in res["errors"]]) msg = "; ".join([f'({x.get("code", -1)}) {x["message"]}' for x in res["errors"]])
fn = logger.info if rep.status_code == 200 else logger.warning
fn(f"{rep.status_code:3d} - {req_id(rep)} - {msg}")
if msg.startswith("The following features cannot be null"):
logger.error(f"Invalid request: {msg}")
exit(1)
if rep.status_code == 200 and "_Missing: No status found with that ID." in msg: if rep.status_code == 200 and "_Missing: No status found with that ID." in msg:
return # ignore this error return # ignore this error
if msg != "OK":
raise ApiError(rep, res) raise ApiError(rep, res)
rep.raise_for_status() rep.raise_for_status()

Просмотреть файл

@ -1,7 +1,8 @@
import base64
import json import json
from collections import defaultdict from collections import defaultdict
from datetime import datetime, timezone from datetime import datetime, timezone
from typing import Any, AsyncGenerator, Callable, TypedDict, TypeVar from typing import Any, AsyncGenerator, Callable, TypeVar
from httpx import HTTPStatusError, Response from httpx import HTTPStatusError, Response
@ -181,3 +182,26 @@ def print_table(rows: list[dict], hr_after=False):
print("\n".join(lines)) print("\n".join(lines))
if hr_after: if hr_after:
print("-" * max_len) print("-" * max_len)
def parse_cookies(val: str) -> dict[str, str]:
try:
val = base64.b64decode(val).decode()
except Exception:
pass
try:
try:
res = json.loads(val)
if isinstance(res, list):
return {x["name"]: x["value"] for x in res}
if isinstance(res, dict):
return res
except json.JSONDecodeError:
res = val.split("; ")
res = [x.split("=") for x in res]
return {x[0]: x[1] for x in res}
except Exception:
pass
raise ValueError(f"Invalid cookie value: {val}")