зеркало из
				https://github.com/viginum-datalab/twscrape.git
				synced 2025-10-30 21:46:13 +02:00 
			
		
		
		
	update queue_client errors handling
Этот коммит содержится в:
		
							родитель
							
								
									852675954e
								
							
						
					
					
						Коммит
						364c0ddfd0
					
				| @ -130,8 +130,9 @@ async def test_get_stats(pool_mock: AccountsPool): | |||||||
|     assert stats["active"] == 1 |     assert stats["active"] == 1 | ||||||
| 
 | 
 | ||||||
|     # should update queue stats |     # should update queue stats | ||||||
|     await pool_mock.get_for_queue(Q) |     acc = await pool_mock.get_for_queue(Q) | ||||||
|  |     assert acc is not None | ||||||
|     stats = await pool_mock.stats() |     stats = await pool_mock.stats() | ||||||
|     assert stats["total"] == 1 |     assert stats["total"] == 1 | ||||||
|     assert stats["active"] == 1 |     assert stats["active"] == 1 | ||||||
|     assert stats["locked_SearchTimeline"] == 1 |     assert stats[f"locked_{Q}"] == 1 | ||||||
|  | |||||||
| @ -229,14 +229,14 @@ class AccountsPool: | |||||||
|             return account |             return account | ||||||
| 
 | 
 | ||||||
|     async def stats(self): |     async def stats(self): | ||||||
|         def by_queue(queue: str): |         def locks_count(queue: str): | ||||||
|             return f""" |             return f""" | ||||||
|             SELECT COUNT(*) FROM accounts |             SELECT COUNT(*) FROM accounts | ||||||
|             WHERE json_extract(locks, '$.{queue}') IS NOT NULL |             WHERE json_extract(locks, '$.{queue}') IS NOT NULL | ||||||
|                 AND json_extract(locks, '$.{queue}') > datetime('now') |                 AND json_extract(locks, '$.{queue}') > datetime('now') | ||||||
|             """ |             """ | ||||||
| 
 | 
 | ||||||
|         qs = "SELECT DISTINCT(f.key) as k from accounts, json_each(stats) f" |         qs = "SELECT DISTINCT(f.key) as k from accounts, json_each(locks) f" | ||||||
|         rs = await fetchall(self._db_file, qs) |         rs = await fetchall(self._db_file, qs) | ||||||
|         gql_ops = [x["k"] for x in rs] |         gql_ops = [x["k"] for x in rs] | ||||||
| 
 | 
 | ||||||
| @ -244,7 +244,7 @@ class AccountsPool: | |||||||
|             ("total", "SELECT COUNT(*) FROM accounts"), |             ("total", "SELECT COUNT(*) FROM accounts"), | ||||||
|             ("active", "SELECT COUNT(*) FROM accounts WHERE active = true"), |             ("active", "SELECT COUNT(*) FROM accounts WHERE active = true"), | ||||||
|             ("inactive", "SELECT COUNT(*) FROM accounts WHERE active = false"), |             ("inactive", "SELECT COUNT(*) FROM accounts WHERE active = false"), | ||||||
|             *[(f"locked_{x}", by_queue(x)) for x in gql_ops], |             *[(f"locked_{x}", locks_count(x)) for x in gql_ops], | ||||||
|         ] |         ] | ||||||
| 
 | 
 | ||||||
|         qs = f"SELECT {','.join([f'({q}) as {k}' for k, q in config])}" |         qs = f"SELECT {','.join([f'({q}) as {k}' for k, q in config])}" | ||||||
|  | |||||||
| @ -90,6 +90,7 @@ class QueueClient: | |||||||
|         print(f"API dump ({len(self.history)}) dumped to {filename}") |         print(f"API dump ({len(self.history)}) dumped to {filename}") | ||||||
| 
 | 
 | ||||||
|     async def req(self, method: str, url: str, params: ReqParams = None): |     async def req(self, method: str, url: str, params: ReqParams = None): | ||||||
|  |         retry_count = 0 | ||||||
|         while True: |         while True: | ||||||
|             ctx = await self._get_ctx() |             ctx = await self._get_ctx() | ||||||
| 
 | 
 | ||||||
| @ -99,35 +100,41 @@ class QueueClient: | |||||||
|                 self._push_history(rep) |                 self._push_history(rep) | ||||||
|                 rep.raise_for_status() |                 rep.raise_for_status() | ||||||
|                 ctx.req_count += 1  # count only successful |                 ctx.req_count += 1  # count only successful | ||||||
|  |                 retry_count = 0 | ||||||
|                 return rep |                 return rep | ||||||
|             except httpx.HTTPStatusError as e: |             except httpx.HTTPStatusError as e: | ||||||
|                 rep = e.response |                 rep = e.response | ||||||
|                 log_id = f"{req_id(rep)} on queue={self.queue}" |                 log_id = f"{req_id(rep)} on queue={self.queue}" | ||||||
| 
 | 
 | ||||||
|                 # rate limit |                 reset_ts, known_code = -1, True | ||||||
|  | 
 | ||||||
|                 if rep.status_code == 429: |                 if rep.status_code == 429: | ||||||
|                     logger.debug(f"Rate limit for {log_id}") |                     # rate limit | ||||||
|                     reset_ts = int(rep.headers.get("x-rate-limit-reset", 0)) |                     reset_ts = int(rep.headers.get("x-rate-limit-reset", 0)) | ||||||
|                     await self._close_ctx(reset_ts)  # get next account on next iteration |                     logger.debug(f"Rate limit for {log_id}") | ||||||
|                     continue |  | ||||||
| 
 |  | ||||||
|                 # possible account banned |  | ||||||
|                 if rep.status_code in (401, 403): |  | ||||||
|                     reset_ts = utc_ts() + 60 * 60  # + 1 hour |  | ||||||
|                     logger.warning(f"Code {rep.status_code} for {log_id} – frozen for 1h") |  | ||||||
|                     await self._close_ctx(reset_ts)  # get next account on next iteration |  | ||||||
|                     continue |  | ||||||
| 
 | 
 | ||||||
|  |                 elif rep.status_code == 400: | ||||||
|                     # twitter can return different types of cursors that not transfers between accounts |                     # twitter can return different types of cursors that not transfers between accounts | ||||||
|                     # just take the next account, the current cursor can work in it |                     # just take the next account, the current cursor can work in it | ||||||
|                 if rep.status_code == 400: |  | ||||||
|                     logger.debug(f"Cursor not valid for {log_id}") |                     logger.debug(f"Cursor not valid for {log_id}") | ||||||
|                     continue |  | ||||||
| 
 | 
 | ||||||
|                 logger.error(f"[{rep.status_code}] {e.request.url}\n{rep.text}") |                 elif rep.status_code in (401, 403): | ||||||
|  |                     # account is locked or banned | ||||||
|  |                     reset_ts = utc_ts() + 60 * 60  # + 1 hour | ||||||
|  |                     logger.warning(f"Code {rep.status_code} for {log_id} – frozen for 1h") | ||||||
|  | 
 | ||||||
|  |                 else: | ||||||
|  |                     known_code = False | ||||||
|  |                     logger.debug(f"HTTP Error {rep.status_code} {e.request.url}\n{rep.text}") | ||||||
|  | 
 | ||||||
|  |                 await self._close_ctx(reset_ts) | ||||||
|  |                 if not known_code: | ||||||
|                     raise e |                     raise e | ||||||
|             except Exception as e: |             except Exception as e: | ||||||
|                 logger.warning(f"Unknown error, retrying. Err ({type(e)}): {str(e)}") |                 logger.warning(f"Unknown error, retrying. Err ({type(e)}): {str(e)}") | ||||||
|  |                 retry_count += 1 | ||||||
|  |                 if retry_count > 3: | ||||||
|  |                     await self._close_ctx(utc_ts() + 60 * 15)  # 15 minutes | ||||||
| 
 | 
 | ||||||
|     async def get(self, url: str, params: ReqParams = None): |     async def get(self, url: str, params: ReqParams = None): | ||||||
|         try: |         try: | ||||||
|  | |||||||
		Загрузка…
	
	
			
			x
			
			
		
	
		Ссылка в новой задаче
	
	Block a user
	 Vlad Pronsky
						Vlad Pronsky