зеркало из
https://github.com/viginum-datalab/twscrape.git
synced 2025-10-30 21:46:13 +02:00
add ability to add accounts & login from cli
Этот коммит содержится в:
родитель
d3d6a002f2
Коммит
2492de1417
85
readme.md
85
readme.md
@ -99,7 +99,58 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
## CLI
|
## CLI
|
||||||
|
|
||||||
You can also use the CLI to make requests (before that you need to log in to some accounts through the programming interface).
|
### Get help on CLI commands
|
||||||
|
|
||||||
|
```sh
|
||||||
|
# show all commands
|
||||||
|
twscrape
|
||||||
|
|
||||||
|
# help on specific comand
|
||||||
|
twscrape search --help
|
||||||
|
```
|
||||||
|
|
||||||
|
### Add accounts & login
|
||||||
|
|
||||||
|
First add accounts from file:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
# twscrape add_accounts <file_path> <line_format>
|
||||||
|
# line_format should have "username", "password", "email", "email_password" tokens
|
||||||
|
# tokens delimeter should be same as an file
|
||||||
|
twscrape add_accounts accounts.txt username:password:email:email_password
|
||||||
|
```
|
||||||
|
|
||||||
|
The call login:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
twscrape login_accounts
|
||||||
|
```
|
||||||
|
|
||||||
|
Accounts and their sessions will be saved, so they can be reused for future requests
|
||||||
|
|
||||||
|
### Get list of accounts and their statuses
|
||||||
|
|
||||||
|
```sh
|
||||||
|
twscrape accounts
|
||||||
|
|
||||||
|
# Output:
|
||||||
|
# ───────────────────────────────────────────────────────────────────────────────────
|
||||||
|
# username logged_in active last_used total_req error_msg
|
||||||
|
# ───────────────────────────────────────────────────────────────────────────────────
|
||||||
|
# user1 True True 2023-05-20 03:20:40 100 None
|
||||||
|
# user2 True True 2023-05-20 03:25:45 120 None
|
||||||
|
# user3 False False None 120 Login error
|
||||||
|
```
|
||||||
|
|
||||||
|
### Use different accounts file
|
||||||
|
|
||||||
|
Useful if using a different set of accounts for different actions
|
||||||
|
|
||||||
|
```
|
||||||
|
twscrape --db test-accounts.db <command>
|
||||||
|
```
|
||||||
|
|
||||||
|
### Search commands
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
twscrape search "QUERY" --limit=20
|
twscrape search "QUERY" --limit=20
|
||||||
@ -126,38 +177,6 @@ By default, parsed data is returned. The original tweet responses can be retriev
|
|||||||
twscrape search "elon mask lang:es" --limit=20 --raw
|
twscrape search "elon mask lang:es" --limit=20 --raw
|
||||||
```
|
```
|
||||||
|
|
||||||
View a list of commands:
|
|
||||||
|
|
||||||
```sh
|
|
||||||
# show all commands
|
|
||||||
twscrape
|
|
||||||
|
|
||||||
# help on specific comand
|
|
||||||
twscrape search --help
|
|
||||||
```
|
|
||||||
|
|
||||||
## Advanced usage
|
|
||||||
|
|
||||||
### Get list of connected accounts and their statuses
|
|
||||||
|
|
||||||
```sh
|
|
||||||
twscrape accounts
|
|
||||||
|
|
||||||
# Output:
|
|
||||||
# ───────────────────────────────────────────────────────────────────────────────────
|
|
||||||
# username logged_in active last_used total_req error_msg
|
|
||||||
# ───────────────────────────────────────────────────────────────────────────────────
|
|
||||||
# user1 True True 2023-05-20 03:20:40 100 None
|
|
||||||
# user2 True True 2023-05-20 03:25:45 120 None
|
|
||||||
# user3 False False None 120 Login error
|
|
||||||
```
|
|
||||||
|
|
||||||
Or from code:
|
|
||||||
```python
|
|
||||||
pool = AccountsPool()
|
|
||||||
print(await pool.accounts_info()) # list
|
|
||||||
```
|
|
||||||
|
|
||||||
## Limitations
|
## Limitations
|
||||||
|
|
||||||
API rate limits (per account):
|
API rate limits (per account):
|
||||||
|
|||||||
@ -11,10 +11,36 @@ from .login import login
|
|||||||
from .utils import utc_ts
|
from .utils import utc_ts
|
||||||
|
|
||||||
|
|
||||||
|
def guess_delim(line: str):
|
||||||
|
l, r = [x.strip() for x in line.split("username")]
|
||||||
|
return r[0] if not l else l[-1]
|
||||||
|
|
||||||
|
|
||||||
class AccountsPool:
|
class AccountsPool:
|
||||||
def __init__(self, db_file="accounts.db"):
|
def __init__(self, db_file="accounts.db"):
|
||||||
self._db_file = db_file
|
self._db_file = db_file
|
||||||
|
|
||||||
|
async def load_from_file(self, filepath: str, line_format: str):
|
||||||
|
assert "username" in line_format, "username is required"
|
||||||
|
assert "password" in line_format, "password is required"
|
||||||
|
assert "email" in line_format, "email is required"
|
||||||
|
assert "email_password" in line_format, "email_password is required"
|
||||||
|
|
||||||
|
line_delim = guess_delim(line_format)
|
||||||
|
tokens = line_format.split(line_delim)
|
||||||
|
|
||||||
|
with open(filepath, "r") as f:
|
||||||
|
lines = f.read().split("\n")
|
||||||
|
lines = [x.strip() for x in lines if x.strip()]
|
||||||
|
for line in lines:
|
||||||
|
data = [x.strip() for x in line.split(line_delim)]
|
||||||
|
if len(data) < len(tokens):
|
||||||
|
logger.warning(f"Invalid line format: {line}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
data = data[: len(tokens)]
|
||||||
|
await self.add_account(**{k: v for k, v in zip(tokens, data)})
|
||||||
|
|
||||||
async def add_account(
|
async def add_account(
|
||||||
self,
|
self,
|
||||||
username: str,
|
username: str,
|
||||||
@ -27,8 +53,11 @@ class AccountsPool:
|
|||||||
qs = "SELECT * FROM accounts WHERE username = :username"
|
qs = "SELECT * FROM accounts WHERE username = :username"
|
||||||
rs = await fetchone(self._db_file, qs, {"username": username})
|
rs = await fetchone(self._db_file, qs, {"username": username})
|
||||||
if rs:
|
if rs:
|
||||||
|
logger.debug(f"Account {username} already exists")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
logger.debug(f"Adding account {username}")
|
||||||
|
|
||||||
account = Account(
|
account = Account(
|
||||||
username=username,
|
username=username,
|
||||||
password=password,
|
password=password,
|
||||||
@ -69,6 +98,7 @@ class AccountsPool:
|
|||||||
async def login(self, account: Account):
|
async def login(self, account: Account):
|
||||||
try:
|
try:
|
||||||
await login(account)
|
await login(account)
|
||||||
|
logger.info(f"Logged in to {account.username} successfully")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error logging in to {account.username}: {e}")
|
logger.error(f"Error logging in to {account.username}: {e}")
|
||||||
finally:
|
finally:
|
||||||
|
|||||||
@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import io
|
||||||
|
|
||||||
from .api import API, AccountsPool
|
from .api import API, AccountsPool
|
||||||
from .logger import logger, set_log_level
|
from .logger import logger, set_log_level
|
||||||
@ -10,6 +11,11 @@ from .utils import print_table
|
|||||||
VER = "0.1.0"
|
VER = "0.1.0"
|
||||||
|
|
||||||
|
|
||||||
|
class CustomHelpFormatter(argparse.HelpFormatter):
|
||||||
|
def __init__(self, prog):
|
||||||
|
super().__init__(prog, max_help_position=30, width=120)
|
||||||
|
|
||||||
|
|
||||||
def get_fn_arg(args):
|
def get_fn_arg(args):
|
||||||
names = ["query", "tweet_id", "user_id", "username"]
|
names = ["query", "tweet_id", "user_id", "username"]
|
||||||
for name in names:
|
for name in names:
|
||||||
@ -24,13 +30,14 @@ async def main(args):
|
|||||||
if args.debug:
|
if args.debug:
|
||||||
set_log_level("DEBUG")
|
set_log_level("DEBUG")
|
||||||
|
|
||||||
pool = AccountsPool(args.db)
|
|
||||||
api = API(pool, debug=args.debug)
|
|
||||||
|
|
||||||
if args.command == "version":
|
if args.command == "version":
|
||||||
print(VER)
|
print(VER)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
logger.debug(f"Using database: {args.db}")
|
||||||
|
pool = AccountsPool(args.db)
|
||||||
|
api = API(pool, debug=args.debug)
|
||||||
|
|
||||||
if args.command == "accounts":
|
if args.command == "accounts":
|
||||||
print_table(await pool.accounts_info())
|
print_table(await pool.accounts_info())
|
||||||
return
|
return
|
||||||
@ -39,6 +46,14 @@ async def main(args):
|
|||||||
print(await pool.stats())
|
print(await pool.stats())
|
||||||
return
|
return
|
||||||
|
|
||||||
|
if args.command == "add_accounts":
|
||||||
|
await pool.load_from_file(args.file_path, args.line_format)
|
||||||
|
return
|
||||||
|
|
||||||
|
if args.command == "login_accounts":
|
||||||
|
await pool.login_all()
|
||||||
|
return
|
||||||
|
|
||||||
fn = args.command + "_raw" if args.raw else args.command
|
fn = args.command + "_raw" if args.raw else args.command
|
||||||
fn = getattr(api, fn, None)
|
fn = getattr(api, fn, None)
|
||||||
if fn is None:
|
if fn is None:
|
||||||
@ -55,8 +70,29 @@ async def main(args):
|
|||||||
print(doc.json())
|
print(doc.json())
|
||||||
|
|
||||||
|
|
||||||
|
def custom_help(p):
|
||||||
|
buffer = io.StringIO()
|
||||||
|
p.print_help(buffer)
|
||||||
|
msg = buffer.getvalue()
|
||||||
|
|
||||||
|
cmd = msg.split("positional arguments:")[1].strip().split("\n")[0]
|
||||||
|
msg = msg.replace("positional arguments:", "commands:")
|
||||||
|
msg = [x for x in msg.split("\n") if not cmd in x and not "..." in x]
|
||||||
|
msg[0] = f"{msg[0]} <command> [...]"
|
||||||
|
|
||||||
|
i = 0
|
||||||
|
for i, line in enumerate(msg):
|
||||||
|
if line.strip().startswith("search"):
|
||||||
|
break
|
||||||
|
|
||||||
|
msg.insert(i, "")
|
||||||
|
msg.insert(i + 1, "search commands:")
|
||||||
|
|
||||||
|
print("\n".join(msg))
|
||||||
|
|
||||||
|
|
||||||
def run():
|
def run():
|
||||||
p = argparse.ArgumentParser(add_help=False)
|
p = argparse.ArgumentParser(add_help=False, formatter_class=CustomHelpFormatter)
|
||||||
p.add_argument("--db", default="accounts.db", help="Accounts database file")
|
p.add_argument("--db", default="accounts.db", help="Accounts database file")
|
||||||
p.add_argument("--debug", action="store_true", help="Enable debug mode")
|
p.add_argument("--debug", action="store_true", help="Enable debug mode")
|
||||||
subparsers = p.add_subparsers(dest="command")
|
subparsers = p.add_subparsers(dest="command")
|
||||||
@ -74,7 +110,11 @@ def run():
|
|||||||
|
|
||||||
subparsers.add_parser("version", help="Show version")
|
subparsers.add_parser("version", help="Show version")
|
||||||
subparsers.add_parser("accounts", help="List all accounts")
|
subparsers.add_parser("accounts", help="List all accounts")
|
||||||
subparsers.add_parser("stats", help="Show scraping statistics")
|
|
||||||
|
add_accounts = subparsers.add_parser("add_accounts", help="Add accounts")
|
||||||
|
add_accounts.add_argument("file_path", help="File with accounts")
|
||||||
|
add_accounts.add_argument("line_format", help="args of Pool.add_account splited by same delim")
|
||||||
|
subparsers.add_parser("login_accounts", help="Login accounts")
|
||||||
|
|
||||||
clim("search", "Search for tweets", "query", "Search query")
|
clim("search", "Search for tweets", "query", "Search query")
|
||||||
cone("tweet_details", "Get tweet details", "tweet_id", "Tweet ID", int)
|
cone("tweet_details", "Get tweet details", "tweet_id", "Tweet ID", int)
|
||||||
@ -89,7 +129,6 @@ def run():
|
|||||||
|
|
||||||
args = p.parse_args()
|
args = p.parse_args()
|
||||||
if args.command is None:
|
if args.command is None:
|
||||||
p.print_help()
|
return custom_help(p)
|
||||||
return
|
|
||||||
|
|
||||||
asyncio.run(main(args))
|
asyncio.run(main(args))
|
||||||
|
|||||||
@ -9,8 +9,35 @@ from .logger import logger
|
|||||||
MAX_WAIT_SEC = 30
|
MAX_WAIT_SEC = 30
|
||||||
|
|
||||||
|
|
||||||
|
class EmailLoginError(Exception):
|
||||||
|
def __init__(self, message="Email login error"):
|
||||||
|
self.message = message
|
||||||
|
super().__init__(self.message)
|
||||||
|
|
||||||
|
|
||||||
|
class EmailCodeTimeoutError(Exception):
|
||||||
|
def __init__(self, message="Email code timeout"):
|
||||||
|
self.message = message
|
||||||
|
super().__init__(self.message)
|
||||||
|
|
||||||
|
|
||||||
|
IMAP_MAPPING: dict[str, str] = {
|
||||||
|
"yahoo.com": "imap.mail.yahoo.com",
|
||||||
|
"icloud.com": "imap.mail.me.com",
|
||||||
|
"outlook.com": "imap-mail.outlook.com",
|
||||||
|
"hotmail.com": "imap-mail.outlook.com",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def add_imap_mapping(email_domain: str, imap_domain: str):
|
||||||
|
IMAP_MAPPING[email_domain] = imap_domain
|
||||||
|
|
||||||
|
|
||||||
def get_imap_domain(email: str) -> str:
|
def get_imap_domain(email: str) -> str:
|
||||||
return f"imap.{email.split('@')[1]}"
|
email_domain = email.split("@")[1]
|
||||||
|
if email_domain in IMAP_MAPPING:
|
||||||
|
return IMAP_MAPPING[email_domain]
|
||||||
|
return f"imap.{email_domain}"
|
||||||
|
|
||||||
|
|
||||||
def search_email_code(imap: imaplib.IMAP4_SSL, count: int, min_t: datetime | None) -> str | None:
|
def search_email_code(imap: imaplib.IMAP4_SSL, count: int, min_t: datetime | None) -> str | None:
|
||||||
@ -39,7 +66,11 @@ async def get_email_code(email: str, password: str, min_t: datetime | None = Non
|
|||||||
domain = get_imap_domain(email)
|
domain = get_imap_domain(email)
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
with imaplib.IMAP4_SSL(domain) as imap:
|
with imaplib.IMAP4_SSL(domain) as imap:
|
||||||
|
try:
|
||||||
imap.login(email, password)
|
imap.login(email, password)
|
||||||
|
except imaplib.IMAP4.error as e:
|
||||||
|
logger.error(f"Error logging into {email}: {e}")
|
||||||
|
raise EmailLoginError() from e
|
||||||
|
|
||||||
was_count = 0
|
was_count = 0
|
||||||
while True:
|
while True:
|
||||||
@ -52,5 +83,6 @@ async def get_email_code(email: str, password: str, min_t: datetime | None = Non
|
|||||||
|
|
||||||
logger.debug(f"Waiting for confirmation code for {email}, msg_count: {now_count}")
|
logger.debug(f"Waiting for confirmation code for {email}, msg_count: {now_count}")
|
||||||
if MAX_WAIT_SEC < time.time() - start_time:
|
if MAX_WAIT_SEC < time.time() - start_time:
|
||||||
raise Exception(f"Timeout on getting confirmation code for {email}")
|
logger.error(f"Timeout waiting for confirmation code for {email}")
|
||||||
|
raise EmailCodeTimeoutError()
|
||||||
await asyncio.sleep(5)
|
await asyncio.sleep(5)
|
||||||
|
|||||||
@ -176,7 +176,7 @@ async def next_login_task(client: AsyncClient, acc: Account, rep: Response):
|
|||||||
if task_id == "LoginJsInstrumentationSubtask":
|
if task_id == "LoginJsInstrumentationSubtask":
|
||||||
return await login_instrumentation(client, acc, prev)
|
return await login_instrumentation(client, acc, prev)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
acc.error_msg = f"task={task_id} err={e}"
|
acc.error_msg = f"login_step={task_id} err={e}"
|
||||||
logger.error(f"Error in {task_id}: {e}")
|
logger.error(f"Error in {task_id}: {e}")
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
|||||||
@ -148,8 +148,17 @@ def print_table(rows: list[dict]):
|
|||||||
if not rows:
|
if not rows:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
def prt(x):
|
||||||
|
if isinstance(x, str):
|
||||||
|
return x
|
||||||
|
|
||||||
|
if isinstance(x, int):
|
||||||
|
return f"{x:,}"
|
||||||
|
|
||||||
|
return str(x)
|
||||||
|
|
||||||
keys = list(rows[0].keys())
|
keys = list(rows[0].keys())
|
||||||
rows = [{k: k for k in keys}, *[{k: str(x.get(k, "")) for k in keys} for x in rows]]
|
rows = [{k: k for k in keys}, *[{k: prt(x.get(k, "")) for k in keys} for x in rows]]
|
||||||
colw = [max(len(x[k]) for x in rows) + 1 for k in keys]
|
colw = [max(len(x[k]) for x in rows) + 1 for k in keys]
|
||||||
|
|
||||||
lines = []
|
lines = []
|
||||||
|
|||||||
Загрузка…
x
Ссылка в новой задаче
Block a user