зеркало из
https://github.com/viginum-datalab/twscrape.git
synced 2025-10-28 20:54:24 +02:00
add python 3.12 support
Этот коммит содержится в:
родитель
fd64ce2018
Коммит
6a232da016
2
.github/workflows/test.yml
поставляемый
2
.github/workflows/test.yml
поставляемый
@ -11,7 +11,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.10", "3.11"]
|
||||
python-version: ["3.10", "3.11", "3.12"]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/setup-python@v4
|
||||
|
||||
1
.tool-versions
Обычный файл
1
.tool-versions
Обычный файл
@ -0,0 +1 @@
|
||||
python 3.12.0
|
||||
4
.vscode/settings.json
поставляемый
4
.vscode/settings.json
поставляемый
@ -8,7 +8,7 @@
|
||||
},
|
||||
"[python]": {
|
||||
"editor.formatOnSave": true,
|
||||
"editor.codeActionsOnSave": ["source.organizeImports"]
|
||||
"editor.codeActionsOnSave": ["source.organizeImports"],
|
||||
"editor.defaultFormatter": "ms-python.black-formatter"
|
||||
},
|
||||
"python.formatting.provider": "black"
|
||||
}
|
||||
|
||||
11
Dockerfile.python
Обычный файл
11
Dockerfile.python
Обычный файл
@ -0,0 +1,11 @@
|
||||
ARG VER=3.12
|
||||
|
||||
FROM python:${VER}-alpine
|
||||
RUN apk add git
|
||||
|
||||
WORKDIR /app
|
||||
COPY pyproject.toml readme.md /app/
|
||||
RUN pip install -e .[dev]
|
||||
COPY . /app
|
||||
|
||||
CMD python --version; pytest tests/
|
||||
40
Makefile
40
Makefile
@ -1,7 +1,5 @@
|
||||
.PHONY: all build
|
||||
|
||||
SQTEST = docker -l warning build -f sqlite.dockerfile
|
||||
|
||||
all:
|
||||
@echo "hi"
|
||||
|
||||
@ -38,27 +36,33 @@ show-cov:
|
||||
@coverage html
|
||||
@open htmlcov/index.html
|
||||
|
||||
act:
|
||||
@act --container-architecture linux/amd64
|
||||
|
||||
changelog:
|
||||
@git pull origin --tags > /dev/null
|
||||
@git log $(shell git describe --tags --abbrev=0 HEAD)^..HEAD --pretty=format:'- %s'
|
||||
|
||||
test34:
|
||||
test-py:
|
||||
$(eval name=twscrape_py$(v))
|
||||
@docker -l warning build -f Dockerfile.python --build-arg VER=$(v) -t $(name) .
|
||||
@docker run $(name)
|
||||
|
||||
test-sq:
|
||||
$(eval name=twscrape_sq$(v))
|
||||
@docker -l warning build -f Dockerfile.sqlite --build-arg SQLY=$(y) --build-arg SQLV=$(v) -t $(name) .
|
||||
@docker run $(name)
|
||||
|
||||
test-py-matrix:
|
||||
@make test-py v=3.10
|
||||
@make test-py v=3.11
|
||||
@make test-py v=3.12
|
||||
|
||||
test-sq-matrix:
|
||||
@# https://www.sqlite.org/chronology.html
|
||||
@$(SQTEST) --build-arg SQLY=2018 --build-arg SQLV=3240000 -t twscrape_sq24 .
|
||||
@$(SQTEST) --build-arg SQLY=2019 --build-arg SQLV=3270200 -t twscrape_sq27 .
|
||||
@$(SQTEST) --build-arg SQLY=2019 --build-arg SQLV=3300100 -t twscrape_sq30 .
|
||||
@$(SQTEST) --build-arg SQLY=2020 --build-arg SQLV=3330000 -t twscrape_sq33 .
|
||||
@$(SQTEST) --build-arg SQLY=2021 --build-arg SQLV=3340100 -t twscrape_sq34 .
|
||||
@$(SQTEST) --build-arg SQLY=2023 --build-arg SQLV=3430000 -t twscrape_sq43 .
|
||||
@docker run twscrape_sq24
|
||||
@docker run twscrape_sq27
|
||||
@docker run twscrape_sq30
|
||||
@docker run twscrape_sq33
|
||||
@docker run twscrape_sq34
|
||||
@docker run twscrape_sq43
|
||||
@make test-sq y=2018 v=3240000
|
||||
@make test-sq y=2019 v=3270200
|
||||
@make test-sq y=2019 v=3300100
|
||||
@make test-sq y=2020 v=3330000
|
||||
@make test-sq y=2021 v=3340100
|
||||
@make test-sq y=2023 v=3430000
|
||||
|
||||
update-mocks:
|
||||
twscrape user_by_id --raw 2244994945 | jq > ./tests/mocked-data/user_by_id_raw.json
|
||||
|
||||
@ -16,22 +16,23 @@ classifiers = [
|
||||
'License :: OSI Approved :: MIT License',
|
||||
'Programming Language :: Python :: 3.10',
|
||||
'Programming Language :: Python :: 3.11',
|
||||
'Programming Language :: Python :: 3.12',
|
||||
]
|
||||
dependencies = [
|
||||
"aiosqlite==0.17.0",
|
||||
"fake-useragent==1.2.1",
|
||||
"httpx==0.24.0",
|
||||
"loguru==0.7.0",
|
||||
"aiosqlite>=0.17.0",
|
||||
"fake-useragent>=1.3.0",
|
||||
"httpx>=0.24.0",
|
||||
"loguru>=0.7.0",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
dev = [
|
||||
"pylint==2.17.3",
|
||||
"pytest-asyncio==0.21.0",
|
||||
"pytest-cov==4.0.0",
|
||||
"pytest-httpx==0.22.0",
|
||||
"pytest==7.3.1",
|
||||
"ruff==0.0.263",
|
||||
"pylint>=2.17.3",
|
||||
"pytest-asyncio>=0.21.0",
|
||||
"pytest-cov>=4.0.0",
|
||||
"pytest-httpx>=0.22.0",
|
||||
"pytest>=7.4.0",
|
||||
"ruff"
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
@ -46,13 +47,13 @@ packages = ['twscrape']
|
||||
[tool.pylint]
|
||||
max-line-length = 99
|
||||
disable = [
|
||||
"C0103", # invalid-name
|
||||
"C0114", # missing-module-docstring
|
||||
"C0115", # missing-class-docstring
|
||||
"C0116", # missing-function-docstring
|
||||
"R0903", # too-few-public-methods
|
||||
"R0913", # too-many-arguments
|
||||
"W0105", # pointless-string-statement
|
||||
"C0103", # invalid-name
|
||||
"C0114", # missing-module-docstring
|
||||
"C0115", # missing-class-docstring
|
||||
"C0116", # missing-function-docstring
|
||||
"R0903", # too-few-public-methods
|
||||
"R0913", # too-many-arguments
|
||||
"W0105", # pointless-string-statement
|
||||
]
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
@ -67,3 +68,6 @@ line-length = 99
|
||||
|
||||
[tool.ruff]
|
||||
line-length = 99
|
||||
|
||||
[tool.hatch.metadata]
|
||||
allow-direct-references = true
|
||||
@ -10,9 +10,9 @@
|
||||
<a href="https://github.com/vladkens/twscrape/actions">
|
||||
<img src="https://github.com/vladkens/twscrape/workflows/test/badge.svg" alt="test status" />
|
||||
</a>
|
||||
<!-- <a href="https://pypi.org/project/twscrape">
|
||||
<a href="https://pypi.org/project/twscrape">
|
||||
<img src="https://badgen.net/pypi/dm/twscrape" alt="downloads" />
|
||||
</a> -->
|
||||
</a>
|
||||
<a href="https://github.com/vladkens/twscrape/blob/main/LICENSE">
|
||||
<img src="https://badgen.net/github/license/vladkens/twscrape" alt="license" />
|
||||
</a>
|
||||
|
||||
@ -252,6 +252,7 @@ async def test_user_tweets_and_replies():
|
||||
for doc in tweets:
|
||||
check_tweet(doc)
|
||||
|
||||
|
||||
async def test_list_timeline():
|
||||
api = API()
|
||||
mock_gen(api, "list_timeline_raw")
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
from twscrape.accounts_pool import AccountsPool
|
||||
from twscrape.utils import utc_ts
|
||||
from twscrape.utils import utc
|
||||
|
||||
|
||||
async def test_add_accounts(pool_mock: AccountsPool):
|
||||
@ -102,7 +102,7 @@ async def test_account_unlock(pool_mock: AccountsPool):
|
||||
assert acc.locks[Q] is not None
|
||||
|
||||
# should update lock time
|
||||
end_time = utc_ts() + 60 # + 1 minute
|
||||
end_time = utc.ts() + 60 # + 1 minute
|
||||
await pool_mock.lock_until(acc.username, Q, end_time)
|
||||
|
||||
acc = await pool_mock.get(acc.username)
|
||||
|
||||
@ -7,7 +7,7 @@ from httpx import AsyncClient, AsyncHTTPTransport
|
||||
|
||||
from .constants import TOKEN
|
||||
from .models import JSONTrait
|
||||
from .utils import from_utciso
|
||||
from .utils import utc
|
||||
|
||||
|
||||
@dataclass
|
||||
@ -30,12 +30,12 @@ class Account(JSONTrait):
|
||||
@staticmethod
|
||||
def from_rs(rs: sqlite3.Row):
|
||||
doc = dict(rs)
|
||||
doc["locks"] = {k: from_utciso(v) for k, v in json.loads(doc["locks"]).items()}
|
||||
doc["locks"] = {k: utc.from_iso(v) for k, v in json.loads(doc["locks"]).items()}
|
||||
doc["stats"] = {k: v for k, v in json.loads(doc["stats"]).items() if isinstance(v, int)}
|
||||
doc["headers"] = json.loads(doc["headers"])
|
||||
doc["cookies"] = json.loads(doc["cookies"])
|
||||
doc["active"] = bool(doc["active"])
|
||||
doc["last_used"] = from_utciso(doc["last_used"]) if doc["last_used"] else None
|
||||
doc["last_used"] = utc.from_iso(doc["last_used"]) if doc["last_used"] else None
|
||||
return Account(**doc)
|
||||
|
||||
def to_rs(self):
|
||||
|
||||
@ -11,7 +11,7 @@ from .account import Account
|
||||
from .db import execute, fetchall, fetchone
|
||||
from .logger import logger
|
||||
from .login import login
|
||||
from .utils import parse_cookies, utc_ts
|
||||
from .utils import parse_cookies, utc
|
||||
|
||||
|
||||
class AccountInfo(TypedDict):
|
||||
@ -197,7 +197,7 @@ class AccountsPool:
|
||||
UPDATE accounts SET
|
||||
locks = json_set(locks, '$.{queue}', datetime({unlock_at}, 'unixepoch')),
|
||||
stats = json_set(stats, '$.{queue}', COALESCE(json_extract(stats, '$.{queue}'), 0) + {req_count}),
|
||||
last_used = datetime({utc_ts()}, 'unixepoch')
|
||||
last_used = datetime({utc.ts()}, 'unixepoch')
|
||||
WHERE username = :username
|
||||
"""
|
||||
await execute(self._db_file, qs, {"username": username})
|
||||
@ -207,7 +207,7 @@ class AccountsPool:
|
||||
UPDATE accounts SET
|
||||
locks = json_remove(locks, '$.{queue}'),
|
||||
stats = json_set(stats, '$.{queue}', COALESCE(json_extract(stats, '$.{queue}'), 0) + {req_count}),
|
||||
last_used = datetime({utc_ts()}, 'unixepoch')
|
||||
last_used = datetime({utc.ts()}, 'unixepoch')
|
||||
WHERE username = :username
|
||||
"""
|
||||
await execute(self._db_file, qs, {"username": username})
|
||||
@ -228,7 +228,7 @@ class AccountsPool:
|
||||
qs = f"""
|
||||
UPDATE accounts SET
|
||||
locks = json_set(locks, '$.{queue}', datetime('now', '+15 minutes')),
|
||||
last_used = datetime({utc_ts()}, 'unixepoch')
|
||||
last_used = datetime({utc.ts()}, 'unixepoch')
|
||||
WHERE username = ({q1})
|
||||
RETURNING *
|
||||
"""
|
||||
@ -238,7 +238,7 @@ class AccountsPool:
|
||||
qs = f"""
|
||||
UPDATE accounts SET
|
||||
locks = json_set(locks, '$.{queue}', datetime('now', '+15 minutes')),
|
||||
last_used = datetime({utc_ts()}, 'unixepoch'),
|
||||
last_used = datetime({utc.ts()}, 'unixepoch'),
|
||||
_tx = '{tx}'
|
||||
WHERE username = ({q1})
|
||||
"""
|
||||
@ -277,8 +277,7 @@ class AccountsPool:
|
||||
"""
|
||||
rs = await fetchone(self._db_file, qs)
|
||||
if rs:
|
||||
now = datetime.utcnow().replace(tzinfo=timezone.utc)
|
||||
trg = datetime.fromisoformat(rs[0]).replace(tzinfo=timezone.utc)
|
||||
now, trg = utc.now(), utc.from_iso(rs[0])
|
||||
if trg < now:
|
||||
return "now"
|
||||
|
||||
|
||||
@ -109,7 +109,7 @@ class API:
|
||||
"hidden_profile_likes_enabled": True,
|
||||
"highlights_tweets_tab_ui_enabled": True,
|
||||
"creator_subscriptions_tweet_preview_api_enabled": True,
|
||||
"hidden_profile_subscriptions_enabled": True
|
||||
"hidden_profile_subscriptions_enabled": True,
|
||||
}
|
||||
return await self._gql_item(op, kv, ft)
|
||||
|
||||
@ -128,7 +128,7 @@ class API:
|
||||
"creator_subscriptions_tweet_preview_api_enabled": True,
|
||||
"subscriptions_verification_info_verified_since_enabled": True,
|
||||
"hidden_profile_subscriptions_enabled": True,
|
||||
"subscriptions_verification_info_is_identity_verified_enabled": False
|
||||
"subscriptions_verification_info_is_identity_verified_enabled": False,
|
||||
}
|
||||
return await self._gql_item(op, kv, ft)
|
||||
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from datetime import timedelta
|
||||
|
||||
from httpx import AsyncClient, HTTPStatusError, Response
|
||||
|
||||
@ -6,7 +6,7 @@ from .account import Account
|
||||
from .constants import LOGIN_URL
|
||||
from .imap import imap_get_email_code, imap_login
|
||||
from .logger import logger
|
||||
from .utils import raise_for_status
|
||||
from .utils import raise_for_status, utc
|
||||
|
||||
|
||||
async def get_guest_token(client: AsyncClient):
|
||||
@ -120,7 +120,7 @@ async def login_confirm_email_code(client: AsyncClient, acc: Account, prev: dict
|
||||
if not imap:
|
||||
imap = await imap_login(acc.email, acc.email_password)
|
||||
|
||||
now_time = datetime.now(timezone.utc) - timedelta(seconds=30)
|
||||
now_time = utc.now() - timedelta(seconds=30)
|
||||
value = await imap_get_email_code(imap, acc.email, now_time)
|
||||
|
||||
payload = {
|
||||
|
||||
@ -12,7 +12,7 @@ from typing import Generator, Optional
|
||||
import httpx
|
||||
|
||||
from .logger import logger
|
||||
from .utils import find_item, get_or, int_or, to_old_rep
|
||||
from .utils import find_item, get_or, int_or, to_old_rep, utc
|
||||
|
||||
|
||||
@dataclass
|
||||
@ -407,7 +407,7 @@ def _get_views(obj: dict, rt_obj: dict):
|
||||
|
||||
def _write_dump(kind: str, e: Exception, x: dict, obj: dict):
|
||||
uniq = "".join(random.choice(string.ascii_lowercase) for _ in range(5))
|
||||
time = datetime.utcnow().strftime("%Y-%m-%d_%H-%M-%S")
|
||||
time = utc.now().strftime("%Y-%m-%d_%H-%M-%S")
|
||||
dumpfile = f"/tmp/twscrape/twscrape_parse_error_{time}_{uniq}.txt"
|
||||
os.makedirs(os.path.dirname(dumpfile), exist_ok=True)
|
||||
|
||||
|
||||
@ -1,16 +1,15 @@
|
||||
import json
|
||||
import os
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
|
||||
from .accounts_pool import Account, AccountsPool
|
||||
from .logger import logger
|
||||
from .utils import utc_ts
|
||||
from .utils import utc
|
||||
|
||||
ReqParams = dict[str, str | int] | None
|
||||
TMP_TS = datetime.utcnow().isoformat().split(".")[0].replace("T", "_").replace(":", "-")[0:16]
|
||||
TMP_TS = utc.now().isoformat().split(".")[0].replace("T", "_").replace(":", "-")[0:16]
|
||||
|
||||
|
||||
class Ctx:
|
||||
@ -39,6 +38,7 @@ class RateLimitError(Exception):
|
||||
class BannedError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class DependencyError(Exception):
|
||||
pass
|
||||
|
||||
@ -151,7 +151,7 @@ class QueueClient:
|
||||
|
||||
# possible new limits for tweets view per account
|
||||
if msg.startswith("(88) Rate limit exceeded") or rep.status_code == 429:
|
||||
await self._close_ctx(utc_ts() + 60 * 60 * 4) # lock for 4 hours
|
||||
await self._close_ctx(utc.ts() + 60 * 60 * 4) # lock for 4 hours
|
||||
raise RateLimitError(msg)
|
||||
|
||||
if msg.startswith("(326) Authorization: Denied by access control"):
|
||||
@ -163,7 +163,7 @@ class QueueClient:
|
||||
|
||||
# possible banned by old api flow
|
||||
if rep.status_code in (401, 403):
|
||||
await self._close_ctx(utc_ts() + 60 * 60 * 12) # lock for 12 hours
|
||||
await self._close_ctx(utc.ts() + 60 * 60 * 12) # lock for 12 hours
|
||||
raise RateLimitError(msg)
|
||||
|
||||
# content not found
|
||||
@ -196,7 +196,7 @@ class QueueClient:
|
||||
except (RateLimitError, BannedError):
|
||||
# already handled
|
||||
continue
|
||||
except (DependencyError):
|
||||
except DependencyError:
|
||||
logger.error(f"Dependency error, returnning: {url}")
|
||||
return
|
||||
except (httpx.ReadTimeout, httpx.ProxyError):
|
||||
@ -206,4 +206,4 @@ class QueueClient:
|
||||
retry_count += 1
|
||||
if retry_count >= 3:
|
||||
logger.warning(f"Unknown error {type(e)}: {e}")
|
||||
await self._close_ctx(utc_ts() + 60 * 15) # 15 minutes
|
||||
await self._close_ctx(utc.ts() + 60 * 15) # 15 minutes
|
||||
|
||||
@ -11,6 +11,20 @@ from .logger import logger
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
class utc:
|
||||
@staticmethod
|
||||
def now() -> datetime:
|
||||
return datetime.now(timezone.utc)
|
||||
|
||||
@staticmethod
|
||||
def from_iso(iso: str) -> datetime:
|
||||
return datetime.fromisoformat(iso).replace(tzinfo=timezone.utc)
|
||||
|
||||
@staticmethod
|
||||
def ts() -> int:
|
||||
return int(utc.now().timestamp())
|
||||
|
||||
|
||||
async def gather(gen: AsyncGenerator[T, None]) -> list[T]:
|
||||
items = []
|
||||
async for x in gen:
|
||||
@ -147,14 +161,6 @@ def to_old_rep(obj: dict) -> dict[str, dict]:
|
||||
return {"tweets": {**tw1, **tw2}, "users": users}
|
||||
|
||||
|
||||
def utc_ts() -> int:
|
||||
return int(datetime.utcnow().replace(tzinfo=timezone.utc).timestamp())
|
||||
|
||||
|
||||
def from_utciso(iso: str) -> datetime:
|
||||
return datetime.fromisoformat(iso).replace(tzinfo=timezone.utc)
|
||||
|
||||
|
||||
def print_table(rows: list[dict], hr_after=False):
|
||||
if not rows:
|
||||
return
|
||||
|
||||
Загрузка…
x
Ссылка в новой задаче
Block a user