small refactoring; handle 403 response

Этот коммит содержится в:
Vlad Pronsky 2023-04-29 20:09:49 +03:00
родитель d4d867aaab
Коммит eadf053f12
5 изменённых файлов: 323 добавлений и 263 удалений

4
.gitignore поставляемый
Просмотреть файл

@ -1,6 +1,8 @@
.DS_Store .DS_Store
.ruff_cache/ .ruff_cache/
sessions/ accounts/
results-raw/
results/
# Byte-compiled / optimized / DLL files # Byte-compiled / optimized / DLL files
__pycache__/ __pycache__/

Просмотреть файл

@ -1,223 +1,100 @@
import email as emaillib
import imaplib
import json import json
import os import os
import time from datetime import datetime, timedelta, timezone
from datetime import datetime, timezone from enum import Enum
from fake_useragent import UserAgent from fake_useragent import UserAgent
from httpx import AsyncClient, Client, Response from httpx import AsyncClient, HTTPStatusError, Response
from loguru import logger from loguru import logger
from .imap import get_email_code
from .utils import raise_for_status from .utils import raise_for_status
TOKEN = "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA" TOKEN = "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA"
TASK_URL = "https://api.twitter.com/1.1/onboarding/task.json" TASK_URL = "https://api.twitter.com/1.1/onboarding/task.json"
def search_email_with_confirmation_code(imap: imaplib.IMAP4_SSL, msg_count: int) -> str | None: class Status(str, Enum):
for i in range(msg_count, 0, -1): NEW = "new"
_, rep = imap.fetch(str(i), "(RFC822)") ACTIVE = "active"
for x in rep: LOGIN_ERROR = "login_error"
if isinstance(x, tuple):
msg = emaillib.message_from_bytes(x[1])
if "info@twitter.com" in msg.get("From", ""):
# eg. Your Twitter confirmation code is XXX
subject = str(msg.get("Subject", ""))
return subject.split(" ")[-1].strip()
return None
def get_verification_code(email: str, password: str, imap_domain: None | str = None) -> str | None: class Account:
imap_domain = f"imap.{email.split('@')[1]}" if imap_domain is None else imap_domain BASE_DIR = "accounts"
with imaplib.IMAP4_SSL(imap_domain) as imap: @classmethod
imap.login(email, password) def load(cls, filepath: str):
try:
with open(filepath) as f:
data = json.load(f)
return cls(**data)
except Exception as e:
logger.error(f"Failed to load account {filepath}: {e}")
return None
before_count = 0 def __init__(
while True: self,
_, rep = imap.select("INBOX") username: str,
msg_count = int(rep[0].decode("utf-8")) if len(rep) > 0 and rep[0] is not None else 0 password: str,
if msg_count > before_count: email: str,
code = search_email_with_confirmation_code(imap, msg_count) email_password: str,
if code is not None: user_agent: str | None = None,
return code proxy: str | None = None,
headers={},
logger.debug(f"Waiting for confirmation email... {msg_count}") cookies={},
time.sleep(1) limits={},
status=Status.NEW,
):
def login_get_guest_token(client: Client) -> str:
rep = client.post("https://api.twitter.com/1.1/guest/activate.json")
raise_for_status(rep, "guest_token")
return rep.json()["guest_token"]
def login_initiate(client: Client) -> Response:
payload = {
"input_flow_data": {
"flow_context": {"debug_overrides": {}, "start_location": {"location": "unknown"}}
},
"subtask_versions": {},
}
rep = client.post(TASK_URL, params={"flow_name": "login"}, json=payload)
raise_for_status(rep, "login_initiate")
return rep
def login_instrumentation(client, flow_token: str) -> Response:
payload = {
"flow_token": flow_token,
"subtask_inputs": [
{
"subtask_id": "LoginJsInstrumentationSubtask",
"js_instrumentation": {"response": "{}", "link": "next_link"},
}
],
}
rep = client.post(TASK_URL, json=payload)
raise_for_status(rep, "login_instrumentation")
return rep
def login_enter_username(client: Client, flow_token: str, username: str) -> Response:
payload = {
"flow_token": flow_token,
"subtask_inputs": [
{
"subtask_id": "LoginEnterUserIdentifierSSO",
"settings_list": {
"setting_responses": [
{
"key": "user_identifier",
"response_data": {"text_data": {"result": username}},
}
],
"link": "next_link",
},
}
],
}
rep = client.post(TASK_URL, json=payload)
raise_for_status(rep, "login_username")
return rep
def login_enter_password(client: Client, flow_token: str, password: str) -> Response:
payload = {
"flow_token": flow_token,
"subtask_inputs": [
{
"subtask_id": "LoginEnterPassword",
"enter_password": {"password": password, "link": "next_link"},
}
],
}
rep = client.post(TASK_URL, json=payload)
raise_for_status(rep, "login_password")
return rep
def login_duplication_check(client: Client, flow_token: str) -> Response:
payload = {
"flow_token": flow_token,
"subtask_inputs": [
{
"subtask_id": "AccountDuplicationCheck",
"check_logged_in_account": {"link": "AccountDuplicationCheck_false"},
}
],
}
rep = client.post(TASK_URL, json=payload)
raise_for_status(rep, "login_duplication_check")
return rep
def get_confirm_email_code(task: dict, email: str, email_password: str) -> str:
logger.debug(f"task: {json.dumps(task)}")
is_code = task["enter_text"]["hint_text"].lower() == "confirmation code"
value = get_verification_code(email, email_password) if is_code else email
assert value is not None, "Could not get verification code"
return value
def login_confirm_email(client: Client, flow_token: str, value: str) -> Response:
payload = {
"flow_token": flow_token,
"subtask_inputs": [
{
"subtask_id": "LoginAcid",
"enter_text": {"text": value, "link": "next_link"},
}
],
}
rep = client.post(TASK_URL, json=payload)
raise_for_status(rep, "login_confirm_email")
return rep
def login_success(client: Client, flow_token: str) -> Response:
payload = {
"flow_token": flow_token,
"subtask_inputs": [],
}
rep = client.post(TASK_URL, json=payload)
raise_for_status(rep, "login_success")
return rep
class UserClient:
def __init__(self, username: str, password: str, email: str, email_password: str):
self.username = username self.username = username
self.password = password self.password = password
self.email = email self.email = email
self.email_password = email_password self.email_password = email_password
self.client = Client() self.user_agent = user_agent or UserAgent().safari
self.session_path = f"sessions/{self.username}.session.json" self.proxy = proxy or None
dirname = os.path.dirname(self.session_path) self.client = AsyncClient(proxies=self.proxy)
os.makedirs(dirname, exist_ok=True) self.client.cookies.update(cookies)
self.client.headers.update(headers)
self.limits: dict[str, datetime] = {} self.client.headers["user-agent"] = self.user_agent
self.client.headers["content-type"] = "application/json"
self.client.headers["authorization"] = TOKEN
self.client.headers["x-twitter-active-user"] = "yes"
self.client.headers["x-twitter-client-language"] = "en"
self.status: Status = status
self.locked: dict[str, bool] = {} self.locked: dict[str, bool] = {}
self.limits: dict[str, datetime] = {
k: datetime.fromisoformat(v) for k, v in limits.items()
}
def dump(self):
return {
"username": self.username,
"password": self.password,
"email": self.email,
"email_password": self.email_password,
"user_agent": self.user_agent,
"proxy": self.proxy,
"cookies": {k: v for k, v in self.client.cookies.items()},
"headers": {k: v for k, v in self.client.headers.items()},
"limits": {k: v.isoformat() for k, v in self.limits.items()},
"status": self.status,
}
def save(self): def save(self):
cookies = dict(self.client.cookies) os.makedirs(self.BASE_DIR, exist_ok=True)
headers = dict(self.client.headers) data = self.dump()
limits = dict(self.limits) with open(f"{self.BASE_DIR}/{self.username}.json", "w") as f:
json.dump(data, f, indent=2)
with open(self.session_path, "w") as fp: def update_limit(self, queue: str, reset_ts: int):
json.dump({"cookies": cookies, "headers": headers, "limits": limits}, fp, indent=2) self.limits[queue] = datetime.fromtimestamp(reset_ts, tz=timezone.utc)
self.save()
def restore(self):
try:
with open(self.session_path) as fp:
data = json.load(fp)
self.client.cookies.update(data.get("cookies", {}))
self.client.headers.update(data.get("headers", {}))
self.limits.update(
{k: datetime.fromisoformat(v) for k, v in data.get("limits", {}).items()}
)
return True
except (FileNotFoundError, json.JSONDecodeError):
return False
def make_client(self) -> AsyncClient:
client = AsyncClient()
client.headers.update(self.client.headers)
client.cookies.update(self.client.cookies)
return client
def can_use(self, queue: str): def can_use(self, queue: str):
if self.locked.get(queue, False): if self.locked.get(queue, False) or self.status != Status.ACTIVE:
return False return False
limit = self.limits.get(queue) limit = self.limits.get(queue)
@ -229,73 +106,197 @@ class UserClient:
def unlock(self, queue: str): def unlock(self, queue: str):
self.locked[queue] = False self.locked[queue] = False
def update_limit(self, queue: str, rep: Response): async def login(self, fresh=False):
reset = rep.headers.get("x-rate-limit-reset", 0) log_id = f"{self.username} - {self.email}"
reset = datetime.fromtimestamp(int(reset), tz=timezone.utc) if self.status != "new" and not fresh:
self.limits[queue] = reset logger.debug(f"logged in already {log_id}")
self.save()
def print_session(self):
for x in self.client.headers.items():
print(x)
print()
for x in self.client.cookies.items():
print(x)
def _next_login_task(self, rep: Response):
client = self.client
ct0 = client.cookies.get("ct0", None)
if ct0:
client.headers["x-csrf-token"] = ct0
client.headers["x-twitter-auth-type"] = "OAuth2Session"
data = rep.json()
logger.debug(f"login tasks: {[x['subtask_id'] for x in data['subtasks']]}")
flow_token = data["flow_token"]
for x in data["subtasks"]:
task_id = x["subtask_id"]
if task_id == "LoginSuccessSubtask":
return login_success(client, flow_token)
if task_id == "LoginAcid":
value = get_confirm_email_code(x, self.email, self.email_password)
return login_confirm_email(client, flow_token, value)
if task_id == "AccountDuplicationCheck":
return login_duplication_check(client, flow_token)
if task_id == "LoginEnterPassword":
return login_enter_password(client, flow_token, self.password)
if task_id == "LoginEnterUserIdentifierSSO":
return login_enter_username(client, flow_token, self.username)
if task_id == "LoginJsInstrumentationSubtask":
return login_instrumentation(client, flow_token)
return None
def login(self):
if self.restore():
logger.debug(f"session restored for {self.username}")
return return
self.client.headers["user-agent"] = UserAgent().safari logger.debug(f"logging in {log_id}")
self.client.headers["content-type"] = "application/json"
self.client.headers["authorization"] = TOKEN
self.client.headers["x-twitter-active-user"] = "yes"
self.client.headers["x-twitter-client-language"] = "en"
guest_token = login_get_guest_token(self.client) guest_token = await self.get_guest_token()
self.client.headers["x-guest-token"] = guest_token self.client.headers["x-guest-token"] = guest_token
rep = login_initiate(self.client) rep = await self.login_initiate()
while True: while True:
rep = self._next_login_task(rep) try:
if rep is None: if rep:
break rep = await self.next_login_task(rep)
else:
break
except HTTPStatusError as e:
if e.response.status_code == 403:
logger.error(f"403 error {log_id}")
self.status = Status.LOGIN_ERROR
self.save()
return
self.client.headers["x-csrf-token"] = self.client.cookies["ct0"] self.client.headers["x-csrf-token"] = self.client.cookies["ct0"]
self.client.headers["x-twitter-auth-type"] = "OAuth2Session" self.client.headers["x-twitter-auth-type"] = "OAuth2Session"
logger.info(f"logged in success {log_id}")
self.status = Status.ACTIVE
self.save() self.save()
logger.debug(f"login success for {self.username}") async def get_guest_token(self):
rep = await self.client.post("https://api.twitter.com/1.1/guest/activate.json")
raise_for_status(rep, "guest_token")
return rep.json()["guest_token"]
async def login_initiate(self) -> Response:
payload = {
"input_flow_data": {
"flow_context": {"debug_overrides": {}, "start_location": {"location": "unknown"}}
},
"subtask_versions": {},
}
rep = await self.client.post(TASK_URL, params={"flow_name": "login"}, json=payload)
raise_for_status(rep, "login_initiate")
return rep
async def next_login_task(self, rep: Response):
ct0 = self.client.cookies.get("ct0", None)
if ct0:
self.client.headers["x-csrf-token"] = ct0
self.client.headers["x-twitter-auth-type"] = "OAuth2Session"
data = rep.json()
assert "flow_token" in data, f"flow_token not in {rep.text}"
# logger.debug(f"login tasks: {[x['subtask_id'] for x in data['subtasks']]}")
for x in data["subtasks"]:
task_id = x["subtask_id"]
try:
if task_id == "LoginSuccessSubtask":
return await self.login_success(data)
if task_id == "LoginAcid":
is_code = x["enter_text"]["hint_text"].lower() == "confirmation code"
# logger.debug(f"is login code: {is_code}")
fn = self.login_confirm_email_code if is_code else self.login_confirm_email
return await fn(data)
if task_id == "AccountDuplicationCheck":
return await self.login_duplication_check(data)
if task_id == "LoginEnterPassword":
return await self.login_enter_password(data)
if task_id == "LoginEnterUserIdentifierSSO":
return await self.login_enter_username(data)
if task_id == "LoginJsInstrumentationSubtask":
return await self.login_instrumentation(data)
except Exception as e:
logger.error(f"Error in {task_id}: {e}")
logger.error(e)
return None
async def login_instrumentation(self, prev: dict) -> Response:
payload = {
"flow_token": prev["flow_token"],
"subtask_inputs": [
{
"subtask_id": "LoginJsInstrumentationSubtask",
"js_instrumentation": {"response": "{}", "link": "next_link"},
}
],
}
rep = await self.client.post(TASK_URL, json=payload)
raise_for_status(rep, "login_instrumentation")
return rep
async def login_enter_username(self, prev: dict) -> Response:
payload = {
"flow_token": prev["flow_token"],
"subtask_inputs": [
{
"subtask_id": "LoginEnterUserIdentifierSSO",
"settings_list": {
"setting_responses": [
{
"key": "user_identifier",
"response_data": {"text_data": {"result": self.username}},
}
],
"link": "next_link",
},
}
],
}
rep = await self.client.post(TASK_URL, json=payload)
raise_for_status(rep, "login_username")
return rep
async def login_enter_password(self, prev: dict) -> Response:
payload = {
"flow_token": prev["flow_token"],
"subtask_inputs": [
{
"subtask_id": "LoginEnterPassword",
"enter_password": {"password": self.password, "link": "next_link"},
}
],
}
rep = await self.client.post(TASK_URL, json=payload)
raise_for_status(rep, "login_password")
return rep
async def login_duplication_check(self, prev: dict) -> Response:
payload = {
"flow_token": prev["flow_token"],
"subtask_inputs": [
{
"subtask_id": "AccountDuplicationCheck",
"check_logged_in_account": {"link": "AccountDuplicationCheck_false"},
}
],
}
rep = await self.client.post(TASK_URL, json=payload)
raise_for_status(rep, "login_duplication_check")
return rep
async def login_confirm_email(self, prev: dict) -> Response:
payload = {
"flow_token": prev["flow_token"],
"subtask_inputs": [
{
"subtask_id": "LoginAcid",
"enter_text": {"text": self.email, "link": "next_link"},
}
],
}
rep = await self.client.post(TASK_URL, json=payload)
raise_for_status(rep, "login_confirm_email")
return rep
async def login_confirm_email_code(self, prev: dict):
now_time = datetime.now(timezone.utc) - timedelta(seconds=30)
value = await get_email_code(self.email, self.email_password, now_time)
payload = {
"flow_token": prev["flow_token"],
"subtask_inputs": [
{
"subtask_id": "LoginAcid",
"enter_text": {"text": value, "link": "next_link"},
}
],
}
rep = await self.client.post(TASK_URL, json=payload)
raise_for_status(rep, "login_confirm_email")
return rep
async def login_success(self, prev: dict) -> Response:
payload = {
"flow_token": prev["flow_token"],
"subtask_inputs": [],
}
rep = await self.client.post(TASK_URL, json=payload)
raise_for_status(rep, "login_success")
return rep

50
twapi/imap.py Обычный файл
Просмотреть файл

@ -0,0 +1,50 @@
import asyncio
import email as emaillib
import imaplib
from datetime import datetime
from loguru import logger
def get_imap_domain(email: str) -> str:
return f"imap.{email.split('@')[1]}"
def search_email_code(imap: imaplib.IMAP4_SSL, count: int, min_t: datetime | None) -> str | None:
for i in range(count, 0, -1):
_, rep = imap.fetch(str(i), "(RFC822)")
for x in rep:
if isinstance(x, tuple):
msg = emaillib.message_from_bytes(x[1])
msg_time = datetime.strptime(msg.get("Date", ""), "%a, %d %b %Y %H:%M:%S %z")
msg_from = str(msg.get("From", "")).lower()
msg_subj = str(msg.get("Subject", "")).lower()
logger.debug(f"({i} of {count}) {msg_from} - {msg_time} - {msg_subj}")
if min_t is not None and msg_time < min_t:
return None
if "info@twitter.com" in msg_from and "confirmation code is" in msg_subj:
# eg. Your Twitter confirmation code is XXX
return msg_subj.split(" ")[-1].strip()
return None
async def get_email_code(email: str, password: str, min_t: datetime | None = None) -> str:
domain = get_imap_domain(email)
with imaplib.IMAP4_SSL(domain) as imap:
imap.login(email, password)
was_count = 0
while True:
_, rep = imap.select("INBOX")
now_count = int(rep[0].decode("utf-8")) if len(rep) > 0 and rep[0] is not None else 0
if now_count > was_count:
code = search_email_code(imap, now_count, min_t)
if code is not None:
return code
logger.debug(f"Waiting for confirmation code for {email}, msg_count: {now_count}")
await asyncio.sleep(5)

Просмотреть файл

@ -2,23 +2,23 @@ import asyncio
from loguru import logger from loguru import logger
from .client import UserClient from .client import Account
class AccountsPool: class AccountsPool:
def __init__(self): def __init__(self):
self.accounts: list[UserClient] = [] self.accounts: list[Account] = []
def add_account(self, account: UserClient): def add_account(self, account: Account):
self.accounts.append(account) self.accounts.append(account)
def get_account(self, queue: str) -> UserClient | None: def get_account(self, queue: str) -> Account | None:
for x in self.accounts: for x in self.accounts:
if x.can_use(queue): if x.can_use(queue):
return x return x
return None return None
async def get_account_or_wait(self, queue: str) -> UserClient: async def get_account_or_wait(self, queue: str) -> Account:
while True: while True:
account = self.get_account(queue) account = self.get_account(queue)
if account: if account:

Просмотреть файл

@ -1,4 +1,5 @@
import json import json
from time import time
from typing import Awaitable, Callable from typing import Awaitable, Callable
from httpx import AsyncClient, HTTPStatusError, Response from httpx import AsyncClient, HTTPStatusError, Response
@ -118,21 +119,27 @@ class Search:
async def _inf_req(self, queue: str, cb: Callable[[AsyncClient], Awaitable[Response]]): async def _inf_req(self, queue: str, cb: Callable[[AsyncClient], Awaitable[Response]]):
while True: while True:
account = await self.pool.get_account_or_wait(queue) account = await self.pool.get_account_or_wait(queue)
client = account.make_client()
try: try:
while True: while True:
rep = await cb(client) rep = await cb(account.client)
rep.raise_for_status() rep.raise_for_status()
yield rep yield rep
except HTTPStatusError as e: except HTTPStatusError as e:
if e.response.status_code == 429: if e.response.status_code == 429:
logger.debug(f"Rate limit for account={account.username} on queue={queue}") logger.debug(f"Rate limit for account={account.username} on queue={queue}")
account.update_limit(queue, e.response) reset_ts = int(e.response.headers.get("x-rate-limit-reset", 0))
account.update_limit(queue, reset_ts)
continue continue
else:
logger.error(f"[{e.response.status_code}] {e.request.url}\n{e.response.text}") if e.response.status_code == 403:
raise e logger.debug(f"Account={account.username} is banned on queue={queue}")
reset_ts = int(time.time() + 60 * 60) # 1 hour
account.update_limit(queue, reset_ts)
continue
logger.error(f"[{e.response.status_code}] {e.request.url}\n{e.response.text}")
raise e
finally: finally:
account.unlock(queue) account.unlock(queue)