add account import with cookies; update mocks

Этот коммит содержится в:
Vlad Pronsky 2023-07-14 23:10:06 +03:00
родитель 22c29b673d
Коммит 4938ca7d6c
18 изменённых файлов: 17555 добавлений и 20667 удалений

Просмотреть файл

@ -41,3 +41,15 @@ changelog:
test34:
docker build -f Dockerfile-test .
update-mocks:
twscrape user_by_id --raw 2244994945 | jq > ./tests/mocked-data/user_by_id_raw.json
twscrape user_by_login --raw twitterdev | jq > ./tests/mocked-data/user_by_login_raw.json
twscrape followers --raw --limit 10 2244994945 | jq > ./tests/mocked-data/followers_raw.json
twscrape following --raw --limit 10 2244994945 | jq > ./tests/mocked-data/following_raw.json
twscrape tweet_details --raw 1649191520250245121 | jq > ./tests/mocked-data/tweet_details_raw.json
twscrape retweeters --raw --limit 10 1649191520250245121 | jq > ./tests/mocked-data/retweeters_raw.json
twscrape favoriters --raw --limit 10 1649191520250245121 | jq > ./tests/mocked-data/favoriters_raw.json
twscrape user_tweets --raw --limit 10 2244994945 | jq > ./tests/mocked-data/user_tweets_raw.json
twscrape user_tweets_and_replies --raw --limit 10 2244994945 | jq > ./tests/mocked-data/user_tweets_and_replies_raw.json
twscrape search --raw --limit 10 "elon musk lang:en" | jq > ./tests/mocked-data/search_raw.json

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -48,25 +48,25 @@
}
},
"fast_followers_count": 0,
"favourites_count": 2128,
"followers_count": 583721,
"friends_count": 1941,
"favourites_count": 2152,
"followers_count": 587135,
"friends_count": 1945,
"has_custom_timelines": true,
"is_translator": false,
"listed_count": 2395,
"listed_count": 2459,
"location": "127.0.0.1",
"media_count": 815,
"name": "Twitter Dev",
"normal_followers_count": 583721,
"normal_followers_count": 587135,
"pinned_tweet_ids_str": [
"1641222782594990080"
"1661790253886177280"
],
"possibly_sensitive": false,
"profile_banner_url": "https://pbs.twimg.com/profile_banners/2244994945/1660405530",
"profile_image_url_https": "https://pbs.twimg.com/profile_images/1445764922474827784/W2zEPN7U_normal.jpg",
"profile_interstitial_type": "",
"screen_name": "TwitterDev",
"statuses_count": 4076,
"statuses_count": 4080,
"translator_type": "regular",
"url": "https://t.co/9wI31m3ELF",
"verified": false,
@ -87,8 +87,14 @@
},
"smart_blocked_by": false,
"smart_blocking": false,
"business_account": {}
"business_account": {},
"highlights_info": {
"can_highlight_tweets": true,
"highlighted_tweets": "0"
},
"creator_subscriptions_count": 0,
"has_hidden_likes_on_profile": false
}
}
}
}
}

Просмотреть файл

@ -48,25 +48,25 @@
}
},
"fast_followers_count": 0,
"favourites_count": 2128,
"followers_count": 583731,
"friends_count": 1941,
"favourites_count": 2152,
"followers_count": 587135,
"friends_count": 1945,
"has_custom_timelines": true,
"is_translator": false,
"listed_count": 2395,
"listed_count": 2459,
"location": "127.0.0.1",
"media_count": 815,
"name": "Twitter Dev",
"normal_followers_count": 583731,
"normal_followers_count": 587135,
"pinned_tweet_ids_str": [
"1641222782594990080"
"1661790253886177280"
],
"possibly_sensitive": false,
"profile_banner_url": "https://pbs.twimg.com/profile_banners/2244994945/1660405530",
"profile_image_url_https": "https://pbs.twimg.com/profile_images/1445764922474827784/W2zEPN7U_normal.jpg",
"profile_interstitial_type": "",
"screen_name": "TwitterDev",
"statuses_count": 4076,
"statuses_count": 4080,
"translator_type": "regular",
"url": "https://t.co/9wI31m3ELF",
"verified": false,
@ -96,6 +96,7 @@
}
},
"is_profile_translatable": false,
"has_hidden_likes_on_profile": false,
"verification_info": {
"reason": {
"description": {
@ -113,16 +114,22 @@
"from_index": 75,
"to_index": 85,
"ref": {
"url": "https://help.twitter.com/en/rules-and-policies/profile-labels",
"url": "https://twitter.com/i/twitter_blue_sign_up",
"url_type": "ExternalUrl"
}
}
]
}
},
"verified_since_msec": "1395425700472"
}
},
"business_account": {}
"highlights_info": {
"can_highlight_tweets": true,
"highlighted_tweets": "0"
},
"business_account": {},
"creator_subscriptions_count": 0
}
}
}
}
}

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

18
tests/test_utils.py Обычный файл
Просмотреть файл

@ -0,0 +1,18 @@
from twscrape.utils import parse_cookies
def test_cookies_parse():
val = "abc=123; def=456; ghi=789"
assert parse_cookies(val) == {"abc": "123", "def": "456", "ghi": "789"}
val = '{"abc": "123", "def": "456", "ghi": "789"}'
assert parse_cookies(val) == {"abc": "123", "def": "456", "ghi": "789"}
val = '[{"name": "abc", "value": "123"}, {"name": "def", "value": "456"}, {"name": "ghi", "value": "789"}]'
assert parse_cookies(val) == {"abc": "123", "def": "456", "ghi": "789"}
val = "eyJhYmMiOiAiMTIzIiwgImRlZiI6ICI0NTYiLCAiZ2hpIjogIjc4OSJ9"
assert parse_cookies(val) == {"abc": "123", "def": "456", "ghi": "789"}
val = "W3sibmFtZSI6ICJhYmMiLCAidmFsdWUiOiAiMTIzIn0sIHsibmFtZSI6ICJkZWYiLCAidmFsdWUiOiAiNDU2In0sIHsibmFtZSI6ICJnaGkiLCAidmFsdWUiOiAiNzg5In1d"
assert parse_cookies(val) == {"abc": "123", "def": "456", "ghi": "789"}

Просмотреть файл

@ -62,4 +62,7 @@ class Account(JSONTrait):
client.headers["x-twitter-active-user"] = "yes"
client.headers["x-twitter-client-language"] = "en"
if "ct0" in client.cookies:
client.headers["x-csrf-token"] = client.cookies["ct0"]
return client

Просмотреть файл

@ -11,7 +11,7 @@ from .account import Account
from .db import execute, fetchall, fetchone
from .logger import logger
from .login import login
from .utils import utc_ts
from .utils import parse_cookies, utc_ts
class AccountInfo(TypedDict):
@ -35,17 +35,17 @@ class AccountsPool:
self._db_file = db_file
async def load_from_file(self, filepath: str, line_format: str):
assert "username" in line_format, "username is required"
assert "password" in line_format, "password is required"
assert "email" in line_format, "email is required"
assert "email_password" in line_format, "email_password is required"
line_delim = guess_delim(line_format)
tokens = line_format.split(line_delim)
required = set(["username", "password", "email", "email_password"])
if not required.issubset(tokens):
raise ValueError(f"Invalid line format: {line_format}")
with open(filepath, "r") as f:
lines = f.read().split("\n")
lines = [x.strip() for x in lines if x.strip()]
for line in lines:
data = [x.strip() for x in line.split(line_delim)]
if len(data) < len(tokens):
@ -53,7 +53,8 @@ class AccountsPool:
continue
data = data[: len(tokens)]
await self.add_account(**{k: v for k, v in zip(tokens, data)})
vals = {k: v for k, v in zip(tokens, data) if k != "_"}
await self.add_account(**vals)
async def add_account(
self,
@ -63,6 +64,7 @@ class AccountsPool:
email_password: str,
user_agent: str | None = None,
proxy: str | None = None,
cookies: str | None = None,
):
qs = "SELECT * FROM accounts WHERE username = :username"
rs = await fetchone(self._db_file, qs, {"username": username})
@ -82,9 +84,13 @@ class AccountsPool:
locks={},
stats={},
headers={},
cookies={},
cookies=parse_cookies(cookies) if cookies else {},
proxy=proxy,
)
if "ct0" in account.cookies:
account.active = True
await self.save(account)
async def delete_accounts(self, usernames: str | list[str]):

Просмотреть файл

@ -7,12 +7,7 @@ from .queue_client import QueueClient
from .utils import encode_params, find_obj, get_by_path, to_old_obj, to_old_rep
SEARCH_FEATURES = {
"rweb_lists_timeline_redesign_enabled": True,
"creator_subscriptions_tweet_preview_api_enabled": True,
"responsive_web_twitter_article_tweet_consumption_enabled": False,
"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": True,
"responsive_web_media_download_video_enabled": False,
"longform_notetweets_inline_media_enabled": True,
}
@ -109,7 +104,12 @@ class API:
async def user_by_id_raw(self, uid: int, kv=None):
op = OP_UserByRestId
kv = {"userId": str(uid), "withSafetyModeUserFields": True, **(kv or {})}
return await self._gql_item(op, kv)
ft = {
"hidden_profile_likes_enabled": True,
"highlights_tweets_tab_ui_enabled": True,
"creator_subscriptions_tweet_preview_api_enabled": True,
}
return await self._gql_item(op, kv, ft)
async def user_by_id(self, uid: int, kv=None):
rep = await self.user_by_id_raw(uid, kv=kv)
@ -121,7 +121,13 @@ class API:
async def user_by_login_raw(self, login: str, kv=None):
op = OP_UserByScreenName
kv = {"screen_name": login, "withSafetyModeUserFields": True, **(kv or {})}
return await self._gql_item(op, kv)
ft = {
"highlights_tweets_tab_ui_enabled": True,
"hidden_profile_likes_enabled": True,
"creator_subscriptions_tweet_preview_api_enabled": True,
"subscriptions_verification_info_verified_since_enabled": True,
}
return await self._gql_item(op, kv, ft)
async def user_by_login(self, login: str, kv=None):
rep = await self.user_by_login_raw(login, kv=kv)

Просмотреть файл

@ -3,6 +3,18 @@ TOKEN = "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Z
GQL_URL = "https://twitter.com/i/api/graphql"
LOGIN_URL = "https://api.twitter.com/1.1/onboarding/task.json"
OP_SearchTimeline = "L1VfBERtzc3VkBBT0YAYHA/SearchTimeline"
OP_UserByRestId = "Lxg1V9AiIzzXEiP2c8dRnw/UserByRestId"
OP_UserByScreenName = "oUZZZ8Oddwxs8Cd3iW3UEA/UserByScreenName"
OP_TweetDetail = "NmCeCgkVlsRGS1cAwqtgmw/TweetDetail"
OP_Followers = "FKV1jfu4AawGapl2KCZbQw/Followers"
OP_Following = "sKlU5dd_nanz9P2CxBt2sg/Following"
OP_Retweeters = "Gnw_Swm60cS-biSLn2OWNw/Retweeters"
OP_Favoriters = "rUyh8HWk8IXv_fvVKj3QjA/Favoriters"
OP_UserTweets = "x8SpjuBpqoww-edf0aUUKA/UserTweets"
OP_UserTweetsAndReplies = "RB2KVuVBRZe4GW8KkoVF2A/UserTweetsAndReplies"
OP_ListLatestTweetsTimeline = "2Vjeyo_L0nizAUhHe3fKyA/ListLatestTweetsTimeline"
GQL_FEATURES = {
"blue_business_profile_image_shape_enabled": True,
"responsive_web_graphql_exclude_directive_enabled": True,
@ -23,16 +35,9 @@ GQL_FEATURES = {
"responsive_web_text_conversations_enabled": False,
"longform_notetweets_rich_text_read_enabled": True,
"responsive_web_enhance_cards_enabled": False,
"creator_subscriptions_tweet_preview_api_enabled": True,
"longform_notetweets_inline_media_enabled": True,
"responsive_web_media_download_video_enabled": False,
"rweb_lists_timeline_redesign_enabled": True,
"responsive_web_twitter_article_tweet_consumption_enabled": False,
}
OP_SearchTimeline = "L1VfBERtzc3VkBBT0YAYHA/SearchTimeline"
OP_UserByRestId = "Lxg1V9AiIzzXEiP2c8dRnw/UserByRestId"
OP_UserByScreenName = "oUZZZ8Oddwxs8Cd3iW3UEA/UserByScreenName"
OP_TweetDetail = "NmCeCgkVlsRGS1cAwqtgmw/TweetDetail"
OP_Followers = "FKV1jfu4AawGapl2KCZbQw/Followers"
OP_Following = "sKlU5dd_nanz9P2CxBt2sg/Following"
OP_Retweeters = "Gnw_Swm60cS-biSLn2OWNw/Retweeters"
OP_Favoriters = "rUyh8HWk8IXv_fvVKj3QjA/Favoriters"
OP_UserTweets = "x8SpjuBpqoww-edf0aUUKA/UserTweets"
OP_UserTweetsAndReplies = "RB2KVuVBRZe4GW8KkoVF2A/UserTweetsAndReplies"
OP_ListLatestTweetsTimeline = "2Vjeyo_L0nizAUhHe3fKyA/ListLatestTweetsTimeline"

Просмотреть файл

@ -112,14 +112,21 @@ class QueueClient:
except json.JSONDecodeError:
res: Any = {"_raw": rep.text}
fn = logger.info if rep.status_code == 200 else logger.warning
fn(f"{rep.status_code:3d} - {req_id(rep)}")
msg = "OK"
if "errors" in res:
msg = "; ".join([x["message"] for x in res["errors"]])
if rep.status_code == 200 and "_Missing: No status found with that ID." in msg:
return # ignore this error
msg = "; ".join([f'({x.get("code", -1)}) {x["message"]}' for x in res["errors"]])
fn = logger.info if rep.status_code == 200 else logger.warning
fn(f"{rep.status_code:3d} - {req_id(rep)} - {msg}")
if msg.startswith("The following features cannot be null"):
logger.error(f"Invalid request: {msg}")
exit(1)
if rep.status_code == 200 and "_Missing: No status found with that ID." in msg:
return # ignore this error
if msg != "OK":
raise ApiError(rep, res)
rep.raise_for_status()

Просмотреть файл

@ -1,7 +1,8 @@
import base64
import json
from collections import defaultdict
from datetime import datetime, timezone
from typing import Any, AsyncGenerator, Callable, TypedDict, TypeVar
from typing import Any, AsyncGenerator, Callable, TypeVar
from httpx import HTTPStatusError, Response
@ -181,3 +182,26 @@ def print_table(rows: list[dict], hr_after=False):
print("\n".join(lines))
if hr_after:
print("-" * max_len)
def parse_cookies(val: str) -> dict[str, str]:
try:
val = base64.b64decode(val).decode()
except Exception:
pass
try:
try:
res = json.loads(val)
if isinstance(res, list):
return {x["name"]: x["value"] for x in res}
if isinstance(res, dict):
return res
except json.JSONDecodeError:
res = val.split("; ")
res = [x.split("=") for x in res]
return {x[0]: x[1] for x in res}
except Exception:
pass
raise ValueError(f"Invalid cookie value: {val}")