зеркало из
https://github.com/viginum-datalab/twscrape.git
synced 2025-10-29 13:06:13 +02:00
add ability to configure login code timeout by env variable; remove redundant logs
Этот коммит содержится в:
родитель
ae4419c9e4
Коммит
5a3f7c19a6
2
Makefile
2
Makefile
@ -1,5 +1,3 @@
|
||||
.PHONY: all build
|
||||
|
||||
all:
|
||||
@echo "hi"
|
||||
|
||||
|
||||
28
readme.md
28
readme.md
@ -1,21 +1,13 @@
|
||||
# twscrape
|
||||
|
||||
<div align="center" style="padding-bottom: 8px">
|
||||
<a href="https://pypi.org/project/twscrape">
|
||||
<img src="https://badgen.net/pypi/v/twscrape" alt="version" />
|
||||
</a>
|
||||
<a href="https://pypi.org/project/twscrape">
|
||||
<img src="https://badgen.net/pypi/python/twscrape" alt="python versions" />
|
||||
</a>
|
||||
<a href="https://github.com/vladkens/twscrape/actions">
|
||||
<img src="https://github.com/vladkens/twscrape/workflows/test/badge.svg" alt="test status" />
|
||||
</a>
|
||||
<a href="https://pypi.org/project/twscrape">
|
||||
<img src="https://badgen.net/pypi/dm/twscrape" alt="downloads" />
|
||||
</a>
|
||||
<a href="https://github.com/vladkens/twscrape/blob/main/LICENSE">
|
||||
<img src="https://badgen.net/github/license/vladkens/twscrape" alt="license" />
|
||||
</a>
|
||||
<div align="center">
|
||||
|
||||
[<img src="https://badgen.net/pypi/v/twscrape" alt="version" />](https://pypi.org/project/twscrape)
|
||||
[<img src="https://badgen.net/pypi/python/twscrape" alt="py versions" />](https://pypi.org/project/twscrape)
|
||||
[<img src="https://github.com/vladkens/twscrape/workflows/test/badge.svg" alt="test status" />](https://github.com/vladkens/twscrape/actions)
|
||||
[<img src="https://badgen.net/pypi/dm/twscrape" alt="downloads" />](https://pypi.org/project/twscrape)
|
||||
[<img src="https://badgen.net/github/license/vladkens/twscrape" alt="license" />](https://github.com/vladkens/twscrape/blob/main/LICENSE)
|
||||
|
||||
</div>
|
||||
|
||||
Twitter GraphQL API implementation with [SNScrape](https://github.com/JustAnotherArchivist/snscrape) data models.
|
||||
@ -239,6 +231,10 @@ By default, parsed data is returned. The original tweet responses can be retriev
|
||||
twscrape search "elon mask lang:es" --limit=20 --raw
|
||||
```
|
||||
|
||||
### Environment variables
|
||||
|
||||
`LOGIN_CODE_TIMEOUT` - how long to wait for email code confirmation in seconds (default `40`)
|
||||
|
||||
## Limitations
|
||||
|
||||
After 1 July 2023 Twitter [introduced new limits](https://twitter.com/elonmusk/status/1675187969420828672) and still continue to update it periodically.
|
||||
|
||||
@ -138,7 +138,7 @@ class AccountsPool:
|
||||
logger.info(f"Logged in to {account.username} successfully")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Error logging in to {account.username}: {e}")
|
||||
logger.error(f"Failed to login to {account.username}: {e}")
|
||||
return False
|
||||
finally:
|
||||
await self.save(account)
|
||||
|
||||
@ -1,12 +1,16 @@
|
||||
import asyncio
|
||||
import email as emaillib
|
||||
import imaplib
|
||||
import os
|
||||
import time
|
||||
from datetime import datetime
|
||||
|
||||
from .logger import logger
|
||||
from .utils import int_or
|
||||
|
||||
MAX_WAIT_SEC = 30
|
||||
_env = dict(os.environ)
|
||||
|
||||
LOGIN_CODE_TIMEOUT = int_or(_env, "LOGIN_CODE_TIMEOUT") or 40
|
||||
|
||||
|
||||
class EmailLoginError(Exception):
|
||||
@ -46,10 +50,11 @@ def _wait_email_code(imap: imaplib.IMAP4_SSL, count: int, min_t: datetime | None
|
||||
for x in rep:
|
||||
if isinstance(x, tuple):
|
||||
msg = emaillib.message_from_bytes(x[1])
|
||||
try:
|
||||
msg_time = datetime.strptime(msg.get("Date", "").split(' (')[0], "%a, %d %b %Y %H:%M:%S %z")
|
||||
except ValueError:
|
||||
msg_time = msg.get("Date", "")
|
||||
|
||||
# https://www.ietf.org/rfc/rfc9051.html#section-6.3.12-13
|
||||
msg_time = msg.get("Date", "").split("(")[0].strip()
|
||||
msg_time = datetime.strptime(msg_time, "%a, %d %b %Y %H:%M:%S %z")
|
||||
|
||||
msg_from = str(msg.get("From", "")).lower()
|
||||
msg_subj = str(msg.get("Subject", "")).lower()
|
||||
logger.info(f"({i} of {count}) {msg_from} - {msg_time} - {msg_subj}")
|
||||
@ -68,24 +73,22 @@ async def imap_get_email_code(
|
||||
imap: imaplib.IMAP4_SSL, email: str, min_t: datetime | None = None
|
||||
) -> str:
|
||||
try:
|
||||
start_time, was_count = time.time(), 0
|
||||
logger.info(f"Waiting for confirmation code for {email}...")
|
||||
start_time = time.time()
|
||||
while True:
|
||||
_, rep = imap.select("INBOX")
|
||||
now_count = int(rep[0].decode("utf-8")) if len(rep) > 0 and rep[0] is not None else 0
|
||||
if now_count > was_count:
|
||||
code = _wait_email_code(imap, now_count, min_t)
|
||||
if code is not None:
|
||||
return code
|
||||
msg_count = int(rep[0].decode("utf-8")) if len(rep) > 0 and rep[0] is not None else 0
|
||||
code = _wait_email_code(imap, msg_count, min_t)
|
||||
if code is not None:
|
||||
return code
|
||||
|
||||
if LOGIN_CODE_TIMEOUT < time.time() - start_time:
|
||||
raise EmailCodeTimeoutError(f"Email code timeout ({LOGIN_CODE_TIMEOUT} sec)")
|
||||
|
||||
logger.info(f"Waiting for confirmation code for {email}, msg_count: {now_count}")
|
||||
if MAX_WAIT_SEC < time.time() - start_time:
|
||||
logger.info(f"Timeout waiting for confirmation code for {email}")
|
||||
raise EmailCodeTimeoutError()
|
||||
await asyncio.sleep(5)
|
||||
except Exception as e:
|
||||
imap.select("INBOX")
|
||||
imap.close()
|
||||
logger.error(f"Error getting confirmation code for {email}: {e}")
|
||||
raise e
|
||||
|
||||
|
||||
|
||||
@ -178,7 +178,6 @@ async def next_login_task(client: AsyncClient, acc: Account, rep: Response, imap
|
||||
return await login_instrumentation(client, acc, prev)
|
||||
except Exception as e:
|
||||
acc.error_msg = f"login_step={task_id} err={e}"
|
||||
logger.error(f"Error in {task_id}: {e}")
|
||||
raise e
|
||||
|
||||
return None
|
||||
|
||||
Загрузка…
x
Ссылка в новой задаче
Block a user