diff --git a/_get_gql_ops.py b/_get_gql_ops.py index 3987acf..ab66b9b 100644 --- a/_get_gql_ops.py +++ b/_get_gql_ops.py @@ -5,6 +5,11 @@ import re import httpx from fake_useragent import UserAgent +""" +docker run --rm -p "3128:3128/tcp" -p "1080:1080/tcp" -e "PROXY_LOGIN=user" -e "PROXY_PASSWORD=pass" tarampampam/3proxy +docker run --rm -p "3129:3128/tcp" -p "1081:1080/tcp" tarampampam/3proxy +""" + client = httpx.Client(headers={"user-agent": UserAgent().chrome}) with open("./twscrape/api.py") as fp: diff --git a/readme.md b/readme.md index c6c8024..9e71a6e 100644 --- a/readme.md +++ b/readme.md @@ -61,10 +61,6 @@ async def main(): cookies = "abc=12; ct0=xyz" # or '{"abc": "12", "ct0": "xyz"}' await api.pool.add_account("user3", "pass3", "u3@mail.com", "mail_pass3", cookies=cookies) - # add account with PROXY - proxy = "http://login:pass@example.com:8080" - await api.pool.add_account("user4", "pass4", "u4@mail.com", "mail_pass4", proxy=proxy) - # API USAGE # search (latest tab) @@ -88,8 +84,10 @@ async def main(): # user info user_id = 2244994945 await api.user_by_id(user_id) # User - await gather(api.followers(user_id, limit=20)) # list[User] await gather(api.following(user_id, limit=20)) # list[User] + await gather(api.followers(user_id, limit=20)) # list[User] + await gather(api.verified_followers(user_id, limit=20)) # list[User] + await gather(api.subscriptions(user_id, limit=20)) # list[User] await gather(api.user_tweets(user_id, limit=20)) # list[Tweet] await gather(api.user_tweets_and_replies(user_id, limit=20)) # list[Tweet] await gather(api.liked_tweets(user_id, limit=20)) # list[Tweet] @@ -175,7 +173,7 @@ twscrape add_accounts ./order-12345.txt username:password:email:email_password:_ ### Login accounts -_Note: If you added accounts with cookies, login not required._ +_Note:_ If you added accounts with cookies, login not required. Run: @@ -183,9 +181,11 @@ Run: twscrape login_accounts ``` -`twscrape` will start login flow for each new account. If X will ask to verify email and you provided `email_password` in `add_account`, then `twscrape` will try to receive verification map by IMAP protocol. After success login account cookies will be saved to db file for future use. +`twscrape` will start login flow for each new account. If X will ask to verify email and you provided `email_password` in `add_account`, then `twscrape` will try to receive verification code by IMAP protocol. After success login account cookies will be saved to db file for future use. -#### Manual email verefication +_Note:_ You can increase timeout for verification code with `TWS_WAIT_EMAIL_CODE` environment variable (default: `40`, in seconds). + +#### Manual email verification In case your email provider not support IMAP protocol (ProtonMail, Tutanota, etc) or IMAP is disabled in settings, you can enter email verification code manually. To do this run login command with `--manual` flag. @@ -197,7 +197,6 @@ twscrape relogin user1 user2 --manual twscrape relogin_failed --manual ``` - ### Get list of accounts and their statuses ```sh @@ -242,8 +241,10 @@ twscrape retweeters TWEET_ID --limit=20 twscrape favoriters TWEET_ID --limit=20 twscrape user_by_id USER_ID twscrape user_by_login USERNAME -twscrape followers USER_ID --limit=20 twscrape following USER_ID --limit=20 +twscrape followers USER_ID --limit=20 +twscrape verified_followers USER_ID --limit=20 +twscrape subscriptions USER_ID --limit=20 twscrape user_tweets USER_ID --limit=20 twscrape user_tweets_and_replies USER_ID --limit=20 twscrape liked_tweets USER_ID --limit=20 @@ -261,9 +262,47 @@ By default, parsed data is returned. The original tweet responses can be retriev twscrape search "elon mask lang:es" --limit=20 --raw ``` -### Environment variables +## Proxy -`LOGIN_CODE_TIMEOUT` - how long to wait for email code confirmation in seconds (default `40`) +There are few options to use proxies. + +1. You can add proxy per account + +```py +proxy = "http://login:pass@example.com:8080" +await api.pool.add_account("user4", "pass4", "u4@mail.com", "mail_pass4", proxy=proxy) +``` + +2. You can use global proxy for all accounts + +```py +proxy = "http://login:pass@example.com:8080" +api = API(proxy=proxy) +doc = await api.user_by_login("elonmusk") +``` + +3. Use can set proxy with environemt variable `TWS_RPOXY`: + +```sh +TWS_PROXY=socks5://user:pass@127.0.0.1:1080 twscrape user_by_login elonmusk +``` + +4. You can change proxy any time like: + +```py +api.proxy = "socks5://user:pass@127.0.0.1:1080" +doc = await api.user_by_login("elonmusk") # new proxy will be used +api.proxy = None +doc = await api.user_by_login("elonmusk") # no proxy used +``` + +5. Proxy priorities + +- `api.proxy` have top priority +- `env.proxy` will be used if `api.proxy` is None +- `acc.proxy` have lowest priotity + +So if you want to use proxy PER ACCOUNT, do NOT override proxy with env variable or by passing proxy param to API. ## Limitations diff --git a/twscrape/account.py b/twscrape/account.py index 2b6bf4f..f88463a 100644 --- a/twscrape/account.py +++ b/twscrape/account.py @@ -1,4 +1,5 @@ import json +import os import sqlite3 from dataclasses import asdict, dataclass, field from datetime import datetime @@ -48,9 +49,13 @@ class Account(JSONTrait): rs["last_used"] = rs["last_used"].isoformat() if rs["last_used"] else None return rs - def make_client(self) -> AsyncClient: + def make_client(self, proxy: str | None) -> AsyncClient: + proxies = [proxy, os.getenv("TWS_PROXY"), self.proxy] + proxies = [x for x in proxies if x is not None] + proxy = proxies[0] if proxies else None + transport = AsyncHTTPTransport(retries=2) - client = AsyncClient(proxies=self.proxy, follow_redirects=True, transport=transport) + client = AsyncClient(proxy=proxy, follow_redirects=True, transport=transport) # saved from previous usage client.cookies.update(self.cookies) diff --git a/twscrape/api.py b/twscrape/api.py index e1ceb53..df306b4 100644 --- a/twscrape/api.py +++ b/twscrape/api.py @@ -52,7 +52,9 @@ class API: # Note: kv is variables, ft is features from original GQL request pool: AccountsPool - def __init__(self, pool: AccountsPool | str | None = None, debug=False): + def __init__( + self, pool: AccountsPool | str | None = None, debug=False, proxy: str | None = None + ): if isinstance(pool, AccountsPool): self.pool = pool elif isinstance(pool, str): @@ -60,6 +62,7 @@ class API: else: self.pool = AccountsPool() + self.proxy = proxy self.debug = debug if self.debug: set_log_level("DEBUG") @@ -89,7 +92,7 @@ class API: queue, cur, cnt, active = op.split("/")[-1], None, 0, True kv, ft = {**kv}, {**GQL_FEATURES, **(ft or {})} - async with QueueClient(self.pool, queue, self.debug) as client: + async with QueueClient(self.pool, queue, self.debug, proxy=self.proxy) as client: while active: params = {"variables": kv, "features": ft} if cur is not None: @@ -115,7 +118,7 @@ class API: async def _gql_item(self, op: str, kv: dict, ft: dict | None = None): ft = ft or {} queue = op.split("/")[-1] - async with QueueClient(self.pool, queue, self.debug) as client: + async with QueueClient(self.pool, queue, self.debug, proxy=self.proxy) as client: params = {"variables": {**kv}, "features": {**GQL_FEATURES, **ft}} return await client.get(f"{GQL_URL}/{op}", params=encode_params(params)) diff --git a/twscrape/imap.py b/twscrape/imap.py index b29cc60..c615861 100644 --- a/twscrape/imap.py +++ b/twscrape/imap.py @@ -6,11 +6,9 @@ import time from datetime import datetime from .logger import logger -from .utils import int_or -_env = dict(os.environ) - -LOGIN_CODE_TIMEOUT = int_or(_env, "LOGIN_CODE_TIMEOUT") or 30 +TWS_WAIT_EMAIL_CODE = [os.getenv("TWS_WAIT_EMAIL_CODE"), os.getenv("LOGIN_CODE_TIMEOUT"), 30] +TWS_WAIT_EMAIL_CODE = [int(x) for x in TWS_WAIT_EMAIL_CODE if x is not None][0] class EmailLoginError(Exception): @@ -82,8 +80,8 @@ async def imap_get_email_code( if code is not None: return code - if LOGIN_CODE_TIMEOUT < time.time() - start_time: - raise EmailCodeTimeoutError(f"Email code timeout ({LOGIN_CODE_TIMEOUT} sec)") + if TWS_WAIT_EMAIL_CODE < time.time() - start_time: + raise EmailCodeTimeoutError(f"Email code timeout ({TWS_WAIT_EMAIL_CODE} sec)") await asyncio.sleep(5) except Exception as e: diff --git a/twscrape/queue_client.py b/twscrape/queue_client.py index 751e125..093c755 100644 --- a/twscrape/queue_client.py +++ b/twscrape/queue_client.py @@ -65,11 +65,12 @@ def dump_rep(rep: Response): class QueueClient: - def __init__(self, pool: AccountsPool, queue: str, debug=False): + def __init__(self, pool: AccountsPool, queue: str, debug=False, proxy: str | None = None): self.pool = pool self.queue = queue self.debug = debug self.ctx: Ctx | None = None + self.proxy = proxy async def __aenter__(self): await self._get_ctx() @@ -104,7 +105,7 @@ class QueueClient: if acc is None: return None - clt = acc.make_client() + clt = acc.make_client(proxy=self.proxy) self.ctx = Ctx(acc, clt) return self.ctx