зеркало из
https://github.com/viginum-datalab/twscrape.git
synced 2025-10-28 20:54:24 +02:00
родитель
cd04ed97e5
Коммит
fc5eaa84ba
@ -5,6 +5,11 @@ import re
|
||||
import httpx
|
||||
from fake_useragent import UserAgent
|
||||
|
||||
"""
|
||||
docker run --rm -p "3128:3128/tcp" -p "1080:1080/tcp" -e "PROXY_LOGIN=user" -e "PROXY_PASSWORD=pass" tarampampam/3proxy
|
||||
docker run --rm -p "3129:3128/tcp" -p "1081:1080/tcp" tarampampam/3proxy
|
||||
"""
|
||||
|
||||
client = httpx.Client(headers={"user-agent": UserAgent().chrome})
|
||||
|
||||
with open("./twscrape/api.py") as fp:
|
||||
|
||||
63
readme.md
63
readme.md
@ -61,10 +61,6 @@ async def main():
|
||||
cookies = "abc=12; ct0=xyz" # or '{"abc": "12", "ct0": "xyz"}'
|
||||
await api.pool.add_account("user3", "pass3", "u3@mail.com", "mail_pass3", cookies=cookies)
|
||||
|
||||
# add account with PROXY
|
||||
proxy = "http://login:pass@example.com:8080"
|
||||
await api.pool.add_account("user4", "pass4", "u4@mail.com", "mail_pass4", proxy=proxy)
|
||||
|
||||
# API USAGE
|
||||
|
||||
# search (latest tab)
|
||||
@ -88,8 +84,10 @@ async def main():
|
||||
# user info
|
||||
user_id = 2244994945
|
||||
await api.user_by_id(user_id) # User
|
||||
await gather(api.followers(user_id, limit=20)) # list[User]
|
||||
await gather(api.following(user_id, limit=20)) # list[User]
|
||||
await gather(api.followers(user_id, limit=20)) # list[User]
|
||||
await gather(api.verified_followers(user_id, limit=20)) # list[User]
|
||||
await gather(api.subscriptions(user_id, limit=20)) # list[User]
|
||||
await gather(api.user_tweets(user_id, limit=20)) # list[Tweet]
|
||||
await gather(api.user_tweets_and_replies(user_id, limit=20)) # list[Tweet]
|
||||
await gather(api.liked_tweets(user_id, limit=20)) # list[Tweet]
|
||||
@ -175,7 +173,7 @@ twscrape add_accounts ./order-12345.txt username:password:email:email_password:_
|
||||
|
||||
### Login accounts
|
||||
|
||||
_Note: If you added accounts with cookies, login not required._
|
||||
_Note:_ If you added accounts with cookies, login not required.
|
||||
|
||||
Run:
|
||||
|
||||
@ -183,9 +181,11 @@ Run:
|
||||
twscrape login_accounts
|
||||
```
|
||||
|
||||
`twscrape` will start login flow for each new account. If X will ask to verify email and you provided `email_password` in `add_account`, then `twscrape` will try to receive verification map by IMAP protocol. After success login account cookies will be saved to db file for future use.
|
||||
`twscrape` will start login flow for each new account. If X will ask to verify email and you provided `email_password` in `add_account`, then `twscrape` will try to receive verification code by IMAP protocol. After success login account cookies will be saved to db file for future use.
|
||||
|
||||
#### Manual email verefication
|
||||
_Note:_ You can increase timeout for verification code with `TWS_WAIT_EMAIL_CODE` environment variable (default: `40`, in seconds).
|
||||
|
||||
#### Manual email verification
|
||||
|
||||
In case your email provider not support IMAP protocol (ProtonMail, Tutanota, etc) or IMAP is disabled in settings, you can enter email verification code manually. To do this run login command with `--manual` flag.
|
||||
|
||||
@ -197,7 +197,6 @@ twscrape relogin user1 user2 --manual
|
||||
twscrape relogin_failed --manual
|
||||
```
|
||||
|
||||
|
||||
### Get list of accounts and their statuses
|
||||
|
||||
```sh
|
||||
@ -242,8 +241,10 @@ twscrape retweeters TWEET_ID --limit=20
|
||||
twscrape favoriters TWEET_ID --limit=20
|
||||
twscrape user_by_id USER_ID
|
||||
twscrape user_by_login USERNAME
|
||||
twscrape followers USER_ID --limit=20
|
||||
twscrape following USER_ID --limit=20
|
||||
twscrape followers USER_ID --limit=20
|
||||
twscrape verified_followers USER_ID --limit=20
|
||||
twscrape subscriptions USER_ID --limit=20
|
||||
twscrape user_tweets USER_ID --limit=20
|
||||
twscrape user_tweets_and_replies USER_ID --limit=20
|
||||
twscrape liked_tweets USER_ID --limit=20
|
||||
@ -261,9 +262,47 @@ By default, parsed data is returned. The original tweet responses can be retriev
|
||||
twscrape search "elon mask lang:es" --limit=20 --raw
|
||||
```
|
||||
|
||||
### Environment variables
|
||||
## Proxy
|
||||
|
||||
`LOGIN_CODE_TIMEOUT` - how long to wait for email code confirmation in seconds (default `40`)
|
||||
There are few options to use proxies.
|
||||
|
||||
1. You can add proxy per account
|
||||
|
||||
```py
|
||||
proxy = "http://login:pass@example.com:8080"
|
||||
await api.pool.add_account("user4", "pass4", "u4@mail.com", "mail_pass4", proxy=proxy)
|
||||
```
|
||||
|
||||
2. You can use global proxy for all accounts
|
||||
|
||||
```py
|
||||
proxy = "http://login:pass@example.com:8080"
|
||||
api = API(proxy=proxy)
|
||||
doc = await api.user_by_login("elonmusk")
|
||||
```
|
||||
|
||||
3. Use can set proxy with environemt variable `TWS_RPOXY`:
|
||||
|
||||
```sh
|
||||
TWS_PROXY=socks5://user:pass@127.0.0.1:1080 twscrape user_by_login elonmusk
|
||||
```
|
||||
|
||||
4. You can change proxy any time like:
|
||||
|
||||
```py
|
||||
api.proxy = "socks5://user:pass@127.0.0.1:1080"
|
||||
doc = await api.user_by_login("elonmusk") # new proxy will be used
|
||||
api.proxy = None
|
||||
doc = await api.user_by_login("elonmusk") # no proxy used
|
||||
```
|
||||
|
||||
5. Proxy priorities
|
||||
|
||||
- `api.proxy` have top priority
|
||||
- `env.proxy` will be used if `api.proxy` is None
|
||||
- `acc.proxy` have lowest priotity
|
||||
|
||||
So if you want to use proxy PER ACCOUNT, do NOT override proxy with env variable or by passing proxy param to API.
|
||||
|
||||
## Limitations
|
||||
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
import json
|
||||
import os
|
||||
import sqlite3
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from datetime import datetime
|
||||
@ -48,9 +49,13 @@ class Account(JSONTrait):
|
||||
rs["last_used"] = rs["last_used"].isoformat() if rs["last_used"] else None
|
||||
return rs
|
||||
|
||||
def make_client(self) -> AsyncClient:
|
||||
def make_client(self, proxy: str | None) -> AsyncClient:
|
||||
proxies = [proxy, os.getenv("TWS_PROXY"), self.proxy]
|
||||
proxies = [x for x in proxies if x is not None]
|
||||
proxy = proxies[0] if proxies else None
|
||||
|
||||
transport = AsyncHTTPTransport(retries=2)
|
||||
client = AsyncClient(proxies=self.proxy, follow_redirects=True, transport=transport)
|
||||
client = AsyncClient(proxy=proxy, follow_redirects=True, transport=transport)
|
||||
|
||||
# saved from previous usage
|
||||
client.cookies.update(self.cookies)
|
||||
|
||||
@ -52,7 +52,9 @@ class API:
|
||||
# Note: kv is variables, ft is features from original GQL request
|
||||
pool: AccountsPool
|
||||
|
||||
def __init__(self, pool: AccountsPool | str | None = None, debug=False):
|
||||
def __init__(
|
||||
self, pool: AccountsPool | str | None = None, debug=False, proxy: str | None = None
|
||||
):
|
||||
if isinstance(pool, AccountsPool):
|
||||
self.pool = pool
|
||||
elif isinstance(pool, str):
|
||||
@ -60,6 +62,7 @@ class API:
|
||||
else:
|
||||
self.pool = AccountsPool()
|
||||
|
||||
self.proxy = proxy
|
||||
self.debug = debug
|
||||
if self.debug:
|
||||
set_log_level("DEBUG")
|
||||
@ -89,7 +92,7 @@ class API:
|
||||
queue, cur, cnt, active = op.split("/")[-1], None, 0, True
|
||||
kv, ft = {**kv}, {**GQL_FEATURES, **(ft or {})}
|
||||
|
||||
async with QueueClient(self.pool, queue, self.debug) as client:
|
||||
async with QueueClient(self.pool, queue, self.debug, proxy=self.proxy) as client:
|
||||
while active:
|
||||
params = {"variables": kv, "features": ft}
|
||||
if cur is not None:
|
||||
@ -115,7 +118,7 @@ class API:
|
||||
async def _gql_item(self, op: str, kv: dict, ft: dict | None = None):
|
||||
ft = ft or {}
|
||||
queue = op.split("/")[-1]
|
||||
async with QueueClient(self.pool, queue, self.debug) as client:
|
||||
async with QueueClient(self.pool, queue, self.debug, proxy=self.proxy) as client:
|
||||
params = {"variables": {**kv}, "features": {**GQL_FEATURES, **ft}}
|
||||
return await client.get(f"{GQL_URL}/{op}", params=encode_params(params))
|
||||
|
||||
|
||||
@ -6,11 +6,9 @@ import time
|
||||
from datetime import datetime
|
||||
|
||||
from .logger import logger
|
||||
from .utils import int_or
|
||||
|
||||
_env = dict(os.environ)
|
||||
|
||||
LOGIN_CODE_TIMEOUT = int_or(_env, "LOGIN_CODE_TIMEOUT") or 30
|
||||
TWS_WAIT_EMAIL_CODE = [os.getenv("TWS_WAIT_EMAIL_CODE"), os.getenv("LOGIN_CODE_TIMEOUT"), 30]
|
||||
TWS_WAIT_EMAIL_CODE = [int(x) for x in TWS_WAIT_EMAIL_CODE if x is not None][0]
|
||||
|
||||
|
||||
class EmailLoginError(Exception):
|
||||
@ -82,8 +80,8 @@ async def imap_get_email_code(
|
||||
if code is not None:
|
||||
return code
|
||||
|
||||
if LOGIN_CODE_TIMEOUT < time.time() - start_time:
|
||||
raise EmailCodeTimeoutError(f"Email code timeout ({LOGIN_CODE_TIMEOUT} sec)")
|
||||
if TWS_WAIT_EMAIL_CODE < time.time() - start_time:
|
||||
raise EmailCodeTimeoutError(f"Email code timeout ({TWS_WAIT_EMAIL_CODE} sec)")
|
||||
|
||||
await asyncio.sleep(5)
|
||||
except Exception as e:
|
||||
|
||||
@ -65,11 +65,12 @@ def dump_rep(rep: Response):
|
||||
|
||||
|
||||
class QueueClient:
|
||||
def __init__(self, pool: AccountsPool, queue: str, debug=False):
|
||||
def __init__(self, pool: AccountsPool, queue: str, debug=False, proxy: str | None = None):
|
||||
self.pool = pool
|
||||
self.queue = queue
|
||||
self.debug = debug
|
||||
self.ctx: Ctx | None = None
|
||||
self.proxy = proxy
|
||||
|
||||
async def __aenter__(self):
|
||||
await self._get_ctx()
|
||||
@ -104,7 +105,7 @@ class QueueClient:
|
||||
if acc is None:
|
||||
return None
|
||||
|
||||
clt = acc.make_client()
|
||||
clt = acc.make_client(proxy=self.proxy)
|
||||
self.ctx = Ctx(acc, clt)
|
||||
return self.ctx
|
||||
|
||||
|
||||
Загрузка…
x
Ссылка в новой задаче
Block a user