зеркало из
				https://github.com/viginum-datalab/twscrape.git
				synced 2025-11-04 07:56:24 +02:00 
			
		
		
		
	ability to reset locks for accounts; update replies dumps
Этот коммит содержится в:
		
							родитель
							
								
									7866e44ade
								
							
						
					
					
						Коммит
						87ef50878d
					
				@ -171,6 +171,10 @@ class AccountsPool:
 | 
				
			|||||||
        rs = await fetchall(self._db_file, qs)
 | 
					        rs = await fetchall(self._db_file, qs)
 | 
				
			||||||
        await self.relogin([x["username"] for x in rs])
 | 
					        await self.relogin([x["username"] for x in rs])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    async def reset_locks(self):
 | 
				
			||||||
 | 
					        qs = "UPDATE accounts SET locks = json_object()"
 | 
				
			||||||
 | 
					        await execute(self._db_file, qs)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    async def set_active(self, username: str, active: bool):
 | 
					    async def set_active(self, username: str, active: bool):
 | 
				
			||||||
        qs = "UPDATE accounts SET active = :active WHERE username = :username"
 | 
					        qs = "UPDATE accounts SET active = :active WHERE username = :username"
 | 
				
			||||||
        await execute(self._db_file, qs, {"username": username, "active": active})
 | 
					        await execute(self._db_file, qs, {"username": username, "active": active})
 | 
				
			||||||
@ -233,10 +237,13 @@ class AccountsPool:
 | 
				
			|||||||
        return Account.from_rs(rs) if rs else None
 | 
					        return Account.from_rs(rs) if rs else None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    async def get_for_queue_or_wait(self, queue: str) -> Account:
 | 
					    async def get_for_queue_or_wait(self, queue: str) -> Account:
 | 
				
			||||||
 | 
					        msg_show = False
 | 
				
			||||||
        while True:
 | 
					        while True:
 | 
				
			||||||
            account = await self.get_for_queue(queue)
 | 
					            account = await self.get_for_queue(queue)
 | 
				
			||||||
            if not account:
 | 
					            if not account:
 | 
				
			||||||
                logger.debug(f"No accounts available for queue '{queue}' (sleeping for 5 sec)")
 | 
					                if not msg_show:
 | 
				
			||||||
 | 
					                    logger.info(f"No accounts available for queue '{queue}' (sleeping for 5 sec)")
 | 
				
			||||||
 | 
					                    msg_show = True
 | 
				
			||||||
                await asyncio.sleep(5)
 | 
					                await asyncio.sleep(5)
 | 
				
			||||||
                continue
 | 
					                continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -92,6 +92,10 @@ async def main(args):
 | 
				
			|||||||
        await pool.relogin(args.usernames)
 | 
					        await pool.relogin(args.usernames)
 | 
				
			||||||
        return
 | 
					        return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if args.command == "reset_locks":
 | 
				
			||||||
 | 
					        await pool.reset_locks()
 | 
				
			||||||
 | 
					        return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    fn = args.command + "_raw" if args.raw else args.command
 | 
					    fn = args.command + "_raw" if args.raw else args.command
 | 
				
			||||||
    fn = getattr(api, fn, None)
 | 
					    fn = getattr(api, fn, None)
 | 
				
			||||||
    if fn is None:
 | 
					    if fn is None:
 | 
				
			||||||
@ -162,6 +166,7 @@ def run():
 | 
				
			|||||||
    relogin.add_argument("usernames", nargs="+", default=[], help="Usernames to re-login")
 | 
					    relogin.add_argument("usernames", nargs="+", default=[], help="Usernames to re-login")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    subparsers.add_parser("relogin_failed", help="Retry login for failed accounts")
 | 
					    subparsers.add_parser("relogin_failed", help="Retry login for failed accounts")
 | 
				
			||||||
 | 
					    subparsers.add_parser("reset_locks", help="Reset all locks")
 | 
				
			||||||
    subparsers.add_parser("stats", help="Get current usage stats")
 | 
					    subparsers.add_parser("stats", help="Get current usage stats")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    c_lim("search", "Search for tweets", "query", "Search query")
 | 
					    c_lim("search", "Search for tweets", "query", "Search query")
 | 
				
			||||||
 | 
				
			|||||||
@ -1,5 +1,5 @@
 | 
				
			|||||||
import json
 | 
					import json
 | 
				
			||||||
from datetime import datetime
 | 
					import os
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import httpx
 | 
					import httpx
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -35,12 +35,36 @@ class ApiError(Exception):
 | 
				
			|||||||
        return f"ApiError ({self.rep.status_code}) {' ~ '.join(self.errors)}"
 | 
					        return f"ApiError ({self.rep.status_code}) {' ~ '.join(self.errors)}"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def dump_rep(rep: httpx.Response):
 | 
				
			||||||
 | 
					    count = getattr(rep, "__count", -1) + 1
 | 
				
			||||||
 | 
					    setattr(rep, "__count", count)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    acc = getattr(rep, "__username", "<unknown>")
 | 
				
			||||||
 | 
					    outfile = f"/tmp/twscrape/{count:05d}_{rep.status_code}_{acc}.txt"
 | 
				
			||||||
 | 
					    os.makedirs(os.path.dirname(outfile), exist_ok=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    msg = []
 | 
				
			||||||
 | 
					    msg.append(f"{count:,d} - {req_id(rep)}")
 | 
				
			||||||
 | 
					    msg.append(f"{rep.status_code} {rep.request.method} {rep.request.url}")
 | 
				
			||||||
 | 
					    msg.append("\n")
 | 
				
			||||||
 | 
					    msg.append("\n".join([str(x) for x in list(rep.request.headers.items())]))
 | 
				
			||||||
 | 
					    msg.append("\n")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					        msg.append(json.dumps(rep.json(), indent=2))
 | 
				
			||||||
 | 
					    except json.JSONDecodeError:
 | 
				
			||||||
 | 
					        msg.append(rep.text)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    txt = "\n".join(msg)
 | 
				
			||||||
 | 
					    with open(outfile, "w") as f:
 | 
				
			||||||
 | 
					        f.write(txt)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class QueueClient:
 | 
					class QueueClient:
 | 
				
			||||||
    def __init__(self, pool: AccountsPool, queue: str, debug=False):
 | 
					    def __init__(self, pool: AccountsPool, queue: str, debug=False):
 | 
				
			||||||
        self.pool = pool
 | 
					        self.pool = pool
 | 
				
			||||||
        self.queue = queue
 | 
					        self.queue = queue
 | 
				
			||||||
        self.debug = debug
 | 
					        self.debug = debug
 | 
				
			||||||
        self.history: list[httpx.Response] = []
 | 
					 | 
				
			||||||
        self.ctx: Ctx | None = None
 | 
					        self.ctx: Ctx | None = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    async def __aenter__(self):
 | 
					    async def __aenter__(self):
 | 
				
			||||||
@ -71,34 +95,6 @@ class QueueClient:
 | 
				
			|||||||
        self.ctx = Ctx(acc, clt)
 | 
					        self.ctx = Ctx(acc, clt)
 | 
				
			||||||
        return self.ctx
 | 
					        return self.ctx
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _push_history(self, rep: httpx.Response):
 | 
					 | 
				
			||||||
        self.history.append(rep)
 | 
					 | 
				
			||||||
        if len(self.history) > 3:
 | 
					 | 
				
			||||||
            self.history.pop(0)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def _dump_history(self, extra: str = ""):
 | 
					 | 
				
			||||||
        if not self.debug:
 | 
					 | 
				
			||||||
            return
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        ts = str(datetime.now()).replace(":", "-").replace(" ", "_")
 | 
					 | 
				
			||||||
        filename = f"/tmp/api_dump_{ts}.txt"
 | 
					 | 
				
			||||||
        with open(filename, "w", encoding="utf-8") as fp:
 | 
					 | 
				
			||||||
            txt = f"{extra}\n"
 | 
					 | 
				
			||||||
            for rep in self.history:
 | 
					 | 
				
			||||||
                res = json.dumps(rep.json(), indent=2)
 | 
					 | 
				
			||||||
                hdr = "\n".join([str(x) for x in list(rep.request.headers.items())])
 | 
					 | 
				
			||||||
                div = "-" * 20
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                msg = f"{div}\n{req_id(rep)}"
 | 
					 | 
				
			||||||
                msg = f"{msg}\n{rep.request.method} {rep.request.url}"
 | 
					 | 
				
			||||||
                msg = f"{msg}\n{rep.status_code}\n{div}"
 | 
					 | 
				
			||||||
                msg = f"{msg}\n{hdr}\n{div}\n{res}\n\n"
 | 
					 | 
				
			||||||
                txt += msg
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            fp.write(txt)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        print(f"API dump ({len(self.history)}) dumped to {filename}")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    async def req(self, method: str, url: str, params: ReqParams = None):
 | 
					    async def req(self, method: str, url: str, params: ReqParams = None):
 | 
				
			||||||
        retry_count = 0
 | 
					        retry_count = 0
 | 
				
			||||||
        while True:
 | 
					        while True:
 | 
				
			||||||
@ -107,13 +103,14 @@ class QueueClient:
 | 
				
			|||||||
            try:
 | 
					            try:
 | 
				
			||||||
                rep = await ctx.clt.request(method, url, params=params)
 | 
					                rep = await ctx.clt.request(method, url, params=params)
 | 
				
			||||||
                setattr(rep, "__username", ctx.acc.username)
 | 
					                setattr(rep, "__username", ctx.acc.username)
 | 
				
			||||||
                self._push_history(rep)
 | 
					                dump_rep(rep)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                rep.raise_for_status()
 | 
					 | 
				
			||||||
                res = rep.json()
 | 
					                res = rep.json()
 | 
				
			||||||
                if "errors" in res:
 | 
					                if "errors" in res:
 | 
				
			||||||
                    raise ApiError(rep, res)
 | 
					                    raise ApiError(rep, res)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                rep.raise_for_status()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                ctx.req_count += 1  # count only successful
 | 
					                ctx.req_count += 1  # count only successful
 | 
				
			||||||
                retry_count = 0
 | 
					                retry_count = 0
 | 
				
			||||||
                return rep
 | 
					                return rep
 | 
				
			||||||
@ -146,7 +143,8 @@ class QueueClient:
 | 
				
			|||||||
                if not known_code:
 | 
					                if not known_code:
 | 
				
			||||||
                    raise e
 | 
					                    raise e
 | 
				
			||||||
            except ApiError as e:
 | 
					            except ApiError as e:
 | 
				
			||||||
                reset_ts = utc_ts() + 60 * 60 * 4  # 4 hours
 | 
					                # possible account banned
 | 
				
			||||||
 | 
					                reset_ts = utc_ts() + 60 * 60 * 12  # 12 hours
 | 
				
			||||||
                await self._close_ctx(reset_ts)
 | 
					                await self._close_ctx(reset_ts)
 | 
				
			||||||
                logger.warning(e)
 | 
					                logger.warning(e)
 | 
				
			||||||
            except Exception as e:
 | 
					            except Exception as e:
 | 
				
			||||||
@ -159,5 +157,4 @@ class QueueClient:
 | 
				
			|||||||
        try:
 | 
					        try:
 | 
				
			||||||
            return await self.req("GET", url, params=params)
 | 
					            return await self.req("GET", url, params=params)
 | 
				
			||||||
        except httpx.HTTPStatusError as e:
 | 
					        except httpx.HTTPStatusError as e:
 | 
				
			||||||
            self._dump_history(f"GET {url} {json.dumps(params)}")
 | 
					 | 
				
			||||||
            raise e
 | 
					            raise e
 | 
				
			||||||
 | 
				
			|||||||
@ -1,7 +1,7 @@
 | 
				
			|||||||
import json
 | 
					import json
 | 
				
			||||||
from collections import defaultdict
 | 
					from collections import defaultdict
 | 
				
			||||||
from datetime import datetime, timezone
 | 
					from datetime import datetime, timezone
 | 
				
			||||||
from typing import Any, AsyncGenerator, Callable, TypeVar
 | 
					from typing import Any, AsyncGenerator, Callable, TypedDict, TypeVar
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from httpx import HTTPStatusError, Response
 | 
					from httpx import HTTPStatusError, Response
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -130,7 +130,7 @@ def to_old_obj(obj: dict):
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def to_old_rep(obj: dict):
 | 
					def to_old_rep(obj: dict) -> dict[str, dict]:
 | 
				
			||||||
    tmp = get_typed_object(obj, defaultdict(list))
 | 
					    tmp = get_typed_object(obj, defaultdict(list))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    tweets = [x for x in tmp.get("Tweet", []) if "legacy" in x]
 | 
					    tweets = [x for x in tmp.get("Tweet", []) if "legacy" in x]
 | 
				
			||||||
 | 
				
			|||||||
		Загрузка…
	
	
			
			x
			
			
		
	
		Ссылка в новой задаче
	
	Block a user