зеркало из
https://github.com/viginum-datalab/twscrape.git
synced 2025-10-30 21:46:13 +02:00
update queue_client errors handling
Этот коммит содержится в:
родитель
852675954e
Коммит
364c0ddfd0
@ -130,8 +130,9 @@ async def test_get_stats(pool_mock: AccountsPool):
|
|||||||
assert stats["active"] == 1
|
assert stats["active"] == 1
|
||||||
|
|
||||||
# should update queue stats
|
# should update queue stats
|
||||||
await pool_mock.get_for_queue(Q)
|
acc = await pool_mock.get_for_queue(Q)
|
||||||
|
assert acc is not None
|
||||||
stats = await pool_mock.stats()
|
stats = await pool_mock.stats()
|
||||||
assert stats["total"] == 1
|
assert stats["total"] == 1
|
||||||
assert stats["active"] == 1
|
assert stats["active"] == 1
|
||||||
assert stats["locked_SearchTimeline"] == 1
|
assert stats[f"locked_{Q}"] == 1
|
||||||
|
|||||||
@ -229,14 +229,14 @@ class AccountsPool:
|
|||||||
return account
|
return account
|
||||||
|
|
||||||
async def stats(self):
|
async def stats(self):
|
||||||
def by_queue(queue: str):
|
def locks_count(queue: str):
|
||||||
return f"""
|
return f"""
|
||||||
SELECT COUNT(*) FROM accounts
|
SELECT COUNT(*) FROM accounts
|
||||||
WHERE json_extract(locks, '$.{queue}') IS NOT NULL
|
WHERE json_extract(locks, '$.{queue}') IS NOT NULL
|
||||||
AND json_extract(locks, '$.{queue}') > datetime('now')
|
AND json_extract(locks, '$.{queue}') > datetime('now')
|
||||||
"""
|
"""
|
||||||
|
|
||||||
qs = "SELECT DISTINCT(f.key) as k from accounts, json_each(stats) f"
|
qs = "SELECT DISTINCT(f.key) as k from accounts, json_each(locks) f"
|
||||||
rs = await fetchall(self._db_file, qs)
|
rs = await fetchall(self._db_file, qs)
|
||||||
gql_ops = [x["k"] for x in rs]
|
gql_ops = [x["k"] for x in rs]
|
||||||
|
|
||||||
@ -244,7 +244,7 @@ class AccountsPool:
|
|||||||
("total", "SELECT COUNT(*) FROM accounts"),
|
("total", "SELECT COUNT(*) FROM accounts"),
|
||||||
("active", "SELECT COUNT(*) FROM accounts WHERE active = true"),
|
("active", "SELECT COUNT(*) FROM accounts WHERE active = true"),
|
||||||
("inactive", "SELECT COUNT(*) FROM accounts WHERE active = false"),
|
("inactive", "SELECT COUNT(*) FROM accounts WHERE active = false"),
|
||||||
*[(f"locked_{x}", by_queue(x)) for x in gql_ops],
|
*[(f"locked_{x}", locks_count(x)) for x in gql_ops],
|
||||||
]
|
]
|
||||||
|
|
||||||
qs = f"SELECT {','.join([f'({q}) as {k}' for k, q in config])}"
|
qs = f"SELECT {','.join([f'({q}) as {k}' for k, q in config])}"
|
||||||
|
|||||||
@ -90,6 +90,7 @@ class QueueClient:
|
|||||||
print(f"API dump ({len(self.history)}) dumped to {filename}")
|
print(f"API dump ({len(self.history)}) dumped to {filename}")
|
||||||
|
|
||||||
async def req(self, method: str, url: str, params: ReqParams = None):
|
async def req(self, method: str, url: str, params: ReqParams = None):
|
||||||
|
retry_count = 0
|
||||||
while True:
|
while True:
|
||||||
ctx = await self._get_ctx()
|
ctx = await self._get_ctx()
|
||||||
|
|
||||||
@ -99,35 +100,41 @@ class QueueClient:
|
|||||||
self._push_history(rep)
|
self._push_history(rep)
|
||||||
rep.raise_for_status()
|
rep.raise_for_status()
|
||||||
ctx.req_count += 1 # count only successful
|
ctx.req_count += 1 # count only successful
|
||||||
|
retry_count = 0
|
||||||
return rep
|
return rep
|
||||||
except httpx.HTTPStatusError as e:
|
except httpx.HTTPStatusError as e:
|
||||||
rep = e.response
|
rep = e.response
|
||||||
log_id = f"{req_id(rep)} on queue={self.queue}"
|
log_id = f"{req_id(rep)} on queue={self.queue}"
|
||||||
|
|
||||||
# rate limit
|
reset_ts, known_code = -1, True
|
||||||
if rep.status_code == 429:
|
|
||||||
logger.debug(f"Rate limit for {log_id}")
|
|
||||||
reset_ts = int(rep.headers.get("x-rate-limit-reset", 0))
|
|
||||||
await self._close_ctx(reset_ts) # get next account on next iteration
|
|
||||||
continue
|
|
||||||
|
|
||||||
# possible account banned
|
if rep.status_code == 429:
|
||||||
if rep.status_code in (401, 403):
|
# rate limit
|
||||||
|
reset_ts = int(rep.headers.get("x-rate-limit-reset", 0))
|
||||||
|
logger.debug(f"Rate limit for {log_id}")
|
||||||
|
|
||||||
|
elif rep.status_code == 400:
|
||||||
|
# twitter can return different types of cursors that not transfers between accounts
|
||||||
|
# just take the next account, the current cursor can work in it
|
||||||
|
logger.debug(f"Cursor not valid for {log_id}")
|
||||||
|
|
||||||
|
elif rep.status_code in (401, 403):
|
||||||
|
# account is locked or banned
|
||||||
reset_ts = utc_ts() + 60 * 60 # + 1 hour
|
reset_ts = utc_ts() + 60 * 60 # + 1 hour
|
||||||
logger.warning(f"Code {rep.status_code} for {log_id} – frozen for 1h")
|
logger.warning(f"Code {rep.status_code} for {log_id} – frozen for 1h")
|
||||||
await self._close_ctx(reset_ts) # get next account on next iteration
|
|
||||||
continue
|
|
||||||
|
|
||||||
# twitter can return different types of cursors that not transfers between accounts
|
else:
|
||||||
# just take the next account, the current cursor can work in it
|
known_code = False
|
||||||
if rep.status_code == 400:
|
logger.debug(f"HTTP Error {rep.status_code} {e.request.url}\n{rep.text}")
|
||||||
logger.debug(f"Cursor not valid for {log_id}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
logger.error(f"[{rep.status_code}] {e.request.url}\n{rep.text}")
|
await self._close_ctx(reset_ts)
|
||||||
raise e
|
if not known_code:
|
||||||
|
raise e
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Unknown error, retrying. Err ({type(e)}): {str(e)}")
|
logger.warning(f"Unknown error, retrying. Err ({type(e)}): {str(e)}")
|
||||||
|
retry_count += 1
|
||||||
|
if retry_count > 3:
|
||||||
|
await self._close_ctx(utc_ts() + 60 * 15) # 15 minutes
|
||||||
|
|
||||||
async def get(self, url: str, params: ReqParams = None):
|
async def get(self, url: str, params: ReqParams = None):
|
||||||
try:
|
try:
|
||||||
|
|||||||
Загрузка…
x
Ссылка в новой задаче
Block a user