Этот коммит содержится в:
Vlad Pronsky 2024-02-11 00:36:08 +02:00
родитель cd04ed97e5
Коммит fc5eaa84ba
6 изменённых файлов: 76 добавлений и 25 удалений

Просмотреть файл

@ -5,6 +5,11 @@ import re
import httpx import httpx
from fake_useragent import UserAgent from fake_useragent import UserAgent
"""
docker run --rm -p "3128:3128/tcp" -p "1080:1080/tcp" -e "PROXY_LOGIN=user" -e "PROXY_PASSWORD=pass" tarampampam/3proxy
docker run --rm -p "3129:3128/tcp" -p "1081:1080/tcp" tarampampam/3proxy
"""
client = httpx.Client(headers={"user-agent": UserAgent().chrome}) client = httpx.Client(headers={"user-agent": UserAgent().chrome})
with open("./twscrape/api.py") as fp: with open("./twscrape/api.py") as fp:

Просмотреть файл

@ -61,10 +61,6 @@ async def main():
cookies = "abc=12; ct0=xyz" # or '{"abc": "12", "ct0": "xyz"}' cookies = "abc=12; ct0=xyz" # or '{"abc": "12", "ct0": "xyz"}'
await api.pool.add_account("user3", "pass3", "u3@mail.com", "mail_pass3", cookies=cookies) await api.pool.add_account("user3", "pass3", "u3@mail.com", "mail_pass3", cookies=cookies)
# add account with PROXY
proxy = "http://login:pass@example.com:8080"
await api.pool.add_account("user4", "pass4", "u4@mail.com", "mail_pass4", proxy=proxy)
# API USAGE # API USAGE
# search (latest tab) # search (latest tab)
@ -88,8 +84,10 @@ async def main():
# user info # user info
user_id = 2244994945 user_id = 2244994945
await api.user_by_id(user_id) # User await api.user_by_id(user_id) # User
await gather(api.followers(user_id, limit=20)) # list[User]
await gather(api.following(user_id, limit=20)) # list[User] await gather(api.following(user_id, limit=20)) # list[User]
await gather(api.followers(user_id, limit=20)) # list[User]
await gather(api.verified_followers(user_id, limit=20)) # list[User]
await gather(api.subscriptions(user_id, limit=20)) # list[User]
await gather(api.user_tweets(user_id, limit=20)) # list[Tweet] await gather(api.user_tweets(user_id, limit=20)) # list[Tweet]
await gather(api.user_tweets_and_replies(user_id, limit=20)) # list[Tweet] await gather(api.user_tweets_and_replies(user_id, limit=20)) # list[Tweet]
await gather(api.liked_tweets(user_id, limit=20)) # list[Tweet] await gather(api.liked_tweets(user_id, limit=20)) # list[Tweet]
@ -175,7 +173,7 @@ twscrape add_accounts ./order-12345.txt username:password:email:email_password:_
### Login accounts ### Login accounts
_Note: If you added accounts with cookies, login not required._ _Note:_ If you added accounts with cookies, login not required.
Run: Run:
@ -183,9 +181,11 @@ Run:
twscrape login_accounts twscrape login_accounts
``` ```
`twscrape` will start login flow for each new account. If X will ask to verify email and you provided `email_password` in `add_account`, then `twscrape` will try to receive verification map by IMAP protocol. After success login account cookies will be saved to db file for future use. `twscrape` will start login flow for each new account. If X will ask to verify email and you provided `email_password` in `add_account`, then `twscrape` will try to receive verification code by IMAP protocol. After success login account cookies will be saved to db file for future use.
#### Manual email verefication _Note:_ You can increase timeout for verification code with `TWS_WAIT_EMAIL_CODE` environment variable (default: `40`, in seconds).
#### Manual email verification
In case your email provider not support IMAP protocol (ProtonMail, Tutanota, etc) or IMAP is disabled in settings, you can enter email verification code manually. To do this run login command with `--manual` flag. In case your email provider not support IMAP protocol (ProtonMail, Tutanota, etc) or IMAP is disabled in settings, you can enter email verification code manually. To do this run login command with `--manual` flag.
@ -197,7 +197,6 @@ twscrape relogin user1 user2 --manual
twscrape relogin_failed --manual twscrape relogin_failed --manual
``` ```
### Get list of accounts and their statuses ### Get list of accounts and their statuses
```sh ```sh
@ -242,8 +241,10 @@ twscrape retweeters TWEET_ID --limit=20
twscrape favoriters TWEET_ID --limit=20 twscrape favoriters TWEET_ID --limit=20
twscrape user_by_id USER_ID twscrape user_by_id USER_ID
twscrape user_by_login USERNAME twscrape user_by_login USERNAME
twscrape followers USER_ID --limit=20
twscrape following USER_ID --limit=20 twscrape following USER_ID --limit=20
twscrape followers USER_ID --limit=20
twscrape verified_followers USER_ID --limit=20
twscrape subscriptions USER_ID --limit=20
twscrape user_tweets USER_ID --limit=20 twscrape user_tweets USER_ID --limit=20
twscrape user_tweets_and_replies USER_ID --limit=20 twscrape user_tweets_and_replies USER_ID --limit=20
twscrape liked_tweets USER_ID --limit=20 twscrape liked_tweets USER_ID --limit=20
@ -261,9 +262,47 @@ By default, parsed data is returned. The original tweet responses can be retriev
twscrape search "elon mask lang:es" --limit=20 --raw twscrape search "elon mask lang:es" --limit=20 --raw
``` ```
### Environment variables ## Proxy
`LOGIN_CODE_TIMEOUT` - how long to wait for email code confirmation in seconds (default `40`) There are few options to use proxies.
1. You can add proxy per account
```py
proxy = "http://login:pass@example.com:8080"
await api.pool.add_account("user4", "pass4", "u4@mail.com", "mail_pass4", proxy=proxy)
```
2. You can use global proxy for all accounts
```py
proxy = "http://login:pass@example.com:8080"
api = API(proxy=proxy)
doc = await api.user_by_login("elonmusk")
```
3. Use can set proxy with environemt variable `TWS_RPOXY`:
```sh
TWS_PROXY=socks5://user:pass@127.0.0.1:1080 twscrape user_by_login elonmusk
```
4. You can change proxy any time like:
```py
api.proxy = "socks5://user:pass@127.0.0.1:1080"
doc = await api.user_by_login("elonmusk") # new proxy will be used
api.proxy = None
doc = await api.user_by_login("elonmusk") # no proxy used
```
5. Proxy priorities
- `api.proxy` have top priority
- `env.proxy` will be used if `api.proxy` is None
- `acc.proxy` have lowest priotity
So if you want to use proxy PER ACCOUNT, do NOT override proxy with env variable or by passing proxy param to API.
## Limitations ## Limitations

Просмотреть файл

@ -1,4 +1,5 @@
import json import json
import os
import sqlite3 import sqlite3
from dataclasses import asdict, dataclass, field from dataclasses import asdict, dataclass, field
from datetime import datetime from datetime import datetime
@ -48,9 +49,13 @@ class Account(JSONTrait):
rs["last_used"] = rs["last_used"].isoformat() if rs["last_used"] else None rs["last_used"] = rs["last_used"].isoformat() if rs["last_used"] else None
return rs return rs
def make_client(self) -> AsyncClient: def make_client(self, proxy: str | None) -> AsyncClient:
proxies = [proxy, os.getenv("TWS_PROXY"), self.proxy]
proxies = [x for x in proxies if x is not None]
proxy = proxies[0] if proxies else None
transport = AsyncHTTPTransport(retries=2) transport = AsyncHTTPTransport(retries=2)
client = AsyncClient(proxies=self.proxy, follow_redirects=True, transport=transport) client = AsyncClient(proxy=proxy, follow_redirects=True, transport=transport)
# saved from previous usage # saved from previous usage
client.cookies.update(self.cookies) client.cookies.update(self.cookies)

Просмотреть файл

@ -52,7 +52,9 @@ class API:
# Note: kv is variables, ft is features from original GQL request # Note: kv is variables, ft is features from original GQL request
pool: AccountsPool pool: AccountsPool
def __init__(self, pool: AccountsPool | str | None = None, debug=False): def __init__(
self, pool: AccountsPool | str | None = None, debug=False, proxy: str | None = None
):
if isinstance(pool, AccountsPool): if isinstance(pool, AccountsPool):
self.pool = pool self.pool = pool
elif isinstance(pool, str): elif isinstance(pool, str):
@ -60,6 +62,7 @@ class API:
else: else:
self.pool = AccountsPool() self.pool = AccountsPool()
self.proxy = proxy
self.debug = debug self.debug = debug
if self.debug: if self.debug:
set_log_level("DEBUG") set_log_level("DEBUG")
@ -89,7 +92,7 @@ class API:
queue, cur, cnt, active = op.split("/")[-1], None, 0, True queue, cur, cnt, active = op.split("/")[-1], None, 0, True
kv, ft = {**kv}, {**GQL_FEATURES, **(ft or {})} kv, ft = {**kv}, {**GQL_FEATURES, **(ft or {})}
async with QueueClient(self.pool, queue, self.debug) as client: async with QueueClient(self.pool, queue, self.debug, proxy=self.proxy) as client:
while active: while active:
params = {"variables": kv, "features": ft} params = {"variables": kv, "features": ft}
if cur is not None: if cur is not None:
@ -115,7 +118,7 @@ class API:
async def _gql_item(self, op: str, kv: dict, ft: dict | None = None): async def _gql_item(self, op: str, kv: dict, ft: dict | None = None):
ft = ft or {} ft = ft or {}
queue = op.split("/")[-1] queue = op.split("/")[-1]
async with QueueClient(self.pool, queue, self.debug) as client: async with QueueClient(self.pool, queue, self.debug, proxy=self.proxy) as client:
params = {"variables": {**kv}, "features": {**GQL_FEATURES, **ft}} params = {"variables": {**kv}, "features": {**GQL_FEATURES, **ft}}
return await client.get(f"{GQL_URL}/{op}", params=encode_params(params)) return await client.get(f"{GQL_URL}/{op}", params=encode_params(params))

Просмотреть файл

@ -6,11 +6,9 @@ import time
from datetime import datetime from datetime import datetime
from .logger import logger from .logger import logger
from .utils import int_or
_env = dict(os.environ) TWS_WAIT_EMAIL_CODE = [os.getenv("TWS_WAIT_EMAIL_CODE"), os.getenv("LOGIN_CODE_TIMEOUT"), 30]
TWS_WAIT_EMAIL_CODE = [int(x) for x in TWS_WAIT_EMAIL_CODE if x is not None][0]
LOGIN_CODE_TIMEOUT = int_or(_env, "LOGIN_CODE_TIMEOUT") or 30
class EmailLoginError(Exception): class EmailLoginError(Exception):
@ -82,8 +80,8 @@ async def imap_get_email_code(
if code is not None: if code is not None:
return code return code
if LOGIN_CODE_TIMEOUT < time.time() - start_time: if TWS_WAIT_EMAIL_CODE < time.time() - start_time:
raise EmailCodeTimeoutError(f"Email code timeout ({LOGIN_CODE_TIMEOUT} sec)") raise EmailCodeTimeoutError(f"Email code timeout ({TWS_WAIT_EMAIL_CODE} sec)")
await asyncio.sleep(5) await asyncio.sleep(5)
except Exception as e: except Exception as e:

Просмотреть файл

@ -65,11 +65,12 @@ def dump_rep(rep: Response):
class QueueClient: class QueueClient:
def __init__(self, pool: AccountsPool, queue: str, debug=False): def __init__(self, pool: AccountsPool, queue: str, debug=False, proxy: str | None = None):
self.pool = pool self.pool = pool
self.queue = queue self.queue = queue
self.debug = debug self.debug = debug
self.ctx: Ctx | None = None self.ctx: Ctx | None = None
self.proxy = proxy
async def __aenter__(self): async def __aenter__(self):
await self._get_ctx() await self._get_ctx()
@ -104,7 +105,7 @@ class QueueClient:
if acc is None: if acc is None:
return None return None
clt = acc.make_client() clt = acc.make_client(proxy=self.proxy)
self.ctx = Ctx(acc, clt) self.ctx = Ctx(acc, clt)
return self.ctx return self.ctx