allow to select log-level; add ci & tests

Этот коммит содержится в:
Vlad Pronsky 2023-04-30 18:36:28 +03:00
родитель 01d59b50d6
Коммит 0c1377d3c6
26 изменённых файлов: 33660 добавлений и 17 удалений

1
.gitattributes поставляемый Обычный файл
Просмотреть файл

@ -0,0 +1 @@
tests/mocked-data/* binary merge

1
.github/CODEOWNERS поставляемый Обычный файл
Просмотреть файл

@ -0,0 +1 @@
* @vladkens

1
.github/FUNDING.yml поставляемый Обычный файл
Просмотреть файл

@ -0,0 +1 @@
github: vladkens

30
.github/workflows/ci.yml поставляемый Обычный файл
Просмотреть файл

@ -0,0 +1,30 @@
name: ci
on: [push]
env:
PIP_ROOT_USER_ACTION: ignore
jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.10", "3.11"]
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: install dependencies
run: |
python -m pip install --upgrade pip
pip install --quiet -r requirements.txt
pip install --quiet -r requirements-dev.txt
- name: lint
run: make lint
- name: test
run: make test

14
Makefile Обычный файл
Просмотреть файл

@ -0,0 +1,14 @@
all:
@echo "hi"
lint:
ruff check .
lint-fix:
ruff check --fix .
test:
pytest --cov=twapi tests/
act:
act --container-architecture linux/amd64

Просмотреть файл

@ -6,3 +6,7 @@ line-length = 99
[tool.ruff]
line-length = 99
[tool.pytest.ini_options]
pythonpath = ["."]
asyncio_mode = "auto"

Просмотреть файл

@ -4,18 +4,13 @@ Twitter GraphQL and Search API implementation with [SNScrape](https://github.com
```python
import asyncio
from twapi.account import Account
from twapi.accounts_pool import AccountsPool
from twapi.api import API
from twapi.utils import gather
from twapi import AccountsPool, API, gather
from twapi.logger import set_log_level
async def main():
acc1 = Account("user1", "pass1", "user1@example.com", "email_pass1")
acc2 = Account("user2", "pass2", "user2@example.com", "email_pass2")
pool = AccountsPool()
pool.add_account(acc1)
pool.add_account(acc2)
pool.add_account("user1", "pass1", "user1@example.com", "email_pass1")
pool.add_account("user2", "pass2", "user2@example.com", "email_pass2")
# login all accounts if required (not account file found)
# session file will be saved to `accounts/{username}.json`
@ -50,6 +45,9 @@ async def main():
async for rep in api.search_raw("elon musk"):
print(rep.status_code, rep.json()) # rep is `httpx.Response` object
# change log level, default info
set_log_level("DEBUG")
if __name__ == "__main__":
asyncio.run(main())
```

4
requirements-dev.txt Обычный файл
Просмотреть файл

@ -0,0 +1,4 @@
ruff==0.0.263
pytest==7.3.1
pytest-asyncio==0.21.0
pytest-cov==4.0.0

1381
tests/mocked-data/favoriters_raw.json Обычный файл

Разница между файлами не показана из-за своего большого размера Загрузить разницу

2918
tests/mocked-data/followers_raw.json Обычный файл

Разница между файлами не показана из-за своего большого размера Загрузить разницу

2732
tests/mocked-data/following_raw.json Обычный файл

Разница между файлами не показана из-за своего большого размера Загрузить разницу

1414
tests/mocked-data/retweeters_raw.json Обычный файл

Разница между файлами не показана из-за своего большого размера Загрузить разницу

5489
tests/mocked-data/search_raw.json Обычный файл

Разница между файлами не показана из-за своего большого размера Загрузить разницу

1813
tests/mocked-data/tweet_details_raw.json Обычный файл

Разница между файлами не показана из-за своего большого размера Загрузить разницу

94
tests/mocked-data/user_by_id_raw.json Обычный файл
Просмотреть файл

@ -0,0 +1,94 @@
{
"data": {
"user": {
"result": {
"__typename": "User",
"id": "VXNlcjoyMjQ0OTk0OTQ1",
"rest_id": "2244994945",
"affiliates_highlighted_label": {
"label": {
"url": {
"url": "https://twitter.com/Twitter",
"urlType": "DeepLink"
},
"badge": {
"url": "https://pbs.twimg.com/profile_images/1488548719062654976/u6qfBBkF_bigger.jpg"
},
"description": "Twitter",
"userLabelType": "BusinessLabel",
"userLabelDisplayType": "Badge"
}
},
"has_graduated_access": true,
"is_blue_verified": true,
"profile_image_shape": "Square",
"legacy": {
"can_dm": false,
"can_media_tag": true,
"created_at": "Sat Dec 14 04:35:55 +0000 2013",
"default_profile": false,
"default_profile_image": false,
"description": "The voice of the #TwitterDev team and your official source for updates, news, and events, related to the #TwitterAPI.",
"entities": {
"description": {
"urls": []
},
"url": {
"urls": [
{
"display_url": "developer.twitter.com/en/community",
"expanded_url": "https://developer.twitter.com/en/community",
"url": "https://t.co/9wI31m3ELF",
"indices": [
0,
23
]
}
]
}
},
"fast_followers_count": 0,
"favourites_count": 2128,
"followers_count": 583721,
"friends_count": 1941,
"has_custom_timelines": true,
"is_translator": false,
"listed_count": 2395,
"location": "127.0.0.1",
"media_count": 815,
"name": "Twitter Dev",
"normal_followers_count": 583721,
"pinned_tweet_ids_str": [
"1641222782594990080"
],
"possibly_sensitive": false,
"profile_banner_url": "https://pbs.twimg.com/profile_banners/2244994945/1660405530",
"profile_image_url_https": "https://pbs.twimg.com/profile_images/1445764922474827784/W2zEPN7U_normal.jpg",
"profile_interstitial_type": "",
"screen_name": "TwitterDev",
"statuses_count": 4076,
"translator_type": "regular",
"url": "https://t.co/9wI31m3ELF",
"verified": false,
"verified_type": "Business",
"want_retweets": false,
"withheld_in_countries": []
},
"professional": {
"rest_id": "1516891231749517312",
"professional_type": "Business",
"category": [
{
"id": 1009,
"name": "Community",
"icon_name": "IconBriefcaseStroke"
}
]
},
"smart_blocked_by": false,
"smart_blocking": false,
"business_account": {}
}
}
}
}

128
tests/mocked-data/user_by_login_raw.json Обычный файл
Просмотреть файл

@ -0,0 +1,128 @@
{
"data": {
"user": {
"result": {
"__typename": "User",
"id": "VXNlcjoyMjQ0OTk0OTQ1",
"rest_id": "2244994945",
"affiliates_highlighted_label": {
"label": {
"url": {
"url": "https://twitter.com/Twitter",
"urlType": "DeepLink"
},
"badge": {
"url": "https://pbs.twimg.com/profile_images/1488548719062654976/u6qfBBkF_bigger.jpg"
},
"description": "Twitter",
"userLabelType": "BusinessLabel",
"userLabelDisplayType": "Badge"
}
},
"has_graduated_access": true,
"is_blue_verified": true,
"profile_image_shape": "Square",
"legacy": {
"can_dm": false,
"can_media_tag": true,
"created_at": "Sat Dec 14 04:35:55 +0000 2013",
"default_profile": false,
"default_profile_image": false,
"description": "The voice of the #TwitterDev team and your official source for updates, news, and events, related to the #TwitterAPI.",
"entities": {
"description": {
"urls": []
},
"url": {
"urls": [
{
"display_url": "developer.twitter.com/en/community",
"expanded_url": "https://developer.twitter.com/en/community",
"url": "https://t.co/9wI31m3ELF",
"indices": [
0,
23
]
}
]
}
},
"fast_followers_count": 0,
"favourites_count": 2128,
"followers_count": 583731,
"friends_count": 1941,
"has_custom_timelines": true,
"is_translator": false,
"listed_count": 2395,
"location": "127.0.0.1",
"media_count": 815,
"name": "Twitter Dev",
"normal_followers_count": 583731,
"pinned_tweet_ids_str": [
"1641222782594990080"
],
"possibly_sensitive": false,
"profile_banner_url": "https://pbs.twimg.com/profile_banners/2244994945/1660405530",
"profile_image_url_https": "https://pbs.twimg.com/profile_images/1445764922474827784/W2zEPN7U_normal.jpg",
"profile_interstitial_type": "",
"screen_name": "TwitterDev",
"statuses_count": 4076,
"translator_type": "regular",
"url": "https://t.co/9wI31m3ELF",
"verified": false,
"verified_type": "Business",
"want_retweets": false,
"withheld_in_countries": []
},
"professional": {
"rest_id": "1516891231749517312",
"professional_type": "Business",
"category": [
{
"id": 1009,
"name": "Community",
"icon_name": "IconBriefcaseStroke"
}
]
},
"smart_blocked_by": false,
"smart_blocking": false,
"legacy_extended_profile": {
"birthdate": {
"day": 21,
"month": 3,
"visibility": "Public",
"year_visibility": "Self"
}
},
"is_profile_translatable": false,
"verification_info": {
"reason": {
"description": {
"text": "This account is verified because it's an affiliate of @Twitter on Twitter. Learn more",
"entities": [
{
"from_index": 54,
"to_index": 62,
"ref": {
"url": "https://twitter.com/Twitter",
"url_type": "ExternalUrl"
}
},
{
"from_index": 75,
"to_index": 85,
"ref": {
"url": "https://help.twitter.com/en/rules-and-policies/profile-labels",
"url_type": "ExternalUrl"
}
}
]
}
}
},
"business_account": {}
}
}
}
}

Разница между файлами не показана из-за своего большого размера Загрузить разницу

8660
tests/mocked-data/user_tweets_raw.json Обычный файл

Разница между файлами не показана из-за своего большого размера Загрузить разницу

256
tests/test_parser.py Обычный файл
Просмотреть файл

@ -0,0 +1,256 @@
import asyncio
import json
import os
from twapi import API, AccountsPool, gather
from twapi.logger import set_log_level
BASE_DIR = os.path.dirname(__file__)
DATA_DIR = os.path.join(BASE_DIR, "mocked-data")
os.makedirs(DATA_DIR, exist_ok=True)
set_log_level("DEBUG")
class Files:
search_raw = "search_raw.json"
user_by_id_raw = "user_by_id_raw.json"
user_by_login_raw = "user_by_login_raw.json"
tweet_details_raw = "tweet_details_raw.json"
followers_raw = "followers_raw.json"
following_raw = "following_raw.json"
retweeters_raw = "retweeters_raw.json"
favoriters_raw = "favoriters_raw.json"
user_tweets_raw = "user_tweets_raw.json"
user_tweets_and_replies_raw = "user_tweets_and_replies_raw.json"
def fake_rep(fn: str):
filename = os.path.join(DATA_DIR, getattr(Files, fn))
with open(filename) as fp:
data = fp.read()
rep = lambda: None # noqa: E731
rep.text = data
rep.json = lambda: json.loads(data)
return rep
def mock_rep(obj, fn: str):
async def cb_rep(*args, **kwargs):
return fake_rep(fn)
setattr(obj, fn, cb_rep)
def mock_gen(obj, fn: str):
async def cb_gen(*args, **kwargs):
yield fake_rep(fn)
setattr(obj, fn, cb_gen)
async def test_search():
api = API(AccountsPool())
mock_gen(api, "search_raw")
items = await gather(api.search("elon musk lang:en", limit=20))
assert len(items) > 0
for x in items:
assert x.id is not None
assert x.user is not None
tw_dict = x.json()
assert x.id == tw_dict["id"]
assert x.user.id == tw_dict["user"]["id"]
async def test_user_by_id():
api = API(AccountsPool())
mock_rep(api, "user_by_id_raw")
rep = await api.user_by_id(2244994945)
assert rep.id == 2244994945
assert rep.username == "TwitterDev"
obj = rep.json()
assert rep.id == obj["id"]
assert rep.username == obj["username"]
async def test_user_by_login():
api = API(AccountsPool())
mock_rep(api, "user_by_login_raw")
rep = await api.user_by_login("twitterdev")
assert rep.id == 2244994945
assert rep.username == "TwitterDev"
obj = rep.json()
assert rep.id == obj["id"]
assert rep.username == obj["username"]
async def test_tweet_details():
api = API(AccountsPool())
mock_rep(api, "tweet_details_raw")
rep = await api.tweet_details(1649191520250245121)
assert rep.id == 1649191520250245121
assert rep.user is not None
obj = rep.json()
assert rep.id == obj["id"]
assert rep.user.id == obj["user"]["id"]
async def test_followers():
api = API(AccountsPool())
mock_gen(api, "followers_raw")
users = await gather(api.followers(2244994945))
assert len(users) > 0
for user in users:
assert user.id is not None
assert user.username is not None
obj = user.json()
assert user.id == obj["id"]
assert user.username == obj["username"]
async def test_following():
api = API(AccountsPool())
mock_gen(api, "following_raw")
users = await gather(api.following(2244994945))
assert len(users) > 0
for user in users:
assert user.id is not None
assert user.username is not None
obj = user.json()
assert user.id == obj["id"]
assert user.username == obj["username"]
async def test_retweters():
api = API(AccountsPool())
mock_gen(api, "retweeters_raw")
users = await gather(api.retweeters(1649191520250245121))
assert len(users) > 0
for user in users:
assert user.id is not None
assert user.username is not None
obj = user.json()
assert user.id == obj["id"]
assert user.username == obj["username"]
async def test_favoriters():
api = API(AccountsPool())
mock_gen(api, "favoriters_raw")
users = await gather(api.favoriters(1649191520250245121))
assert len(users) > 0
for user in users:
assert user.id is not None
assert user.username is not None
obj = user.json()
assert user.id == obj["id"]
assert user.username == obj["username"]
async def test_user_tweets():
api = API(AccountsPool())
mock_gen(api, "user_tweets_raw")
tweets = await gather(api.user_tweets(2244994945))
assert len(tweets) > 0
for tweet in tweets:
assert tweet.id is not None
assert tweet.user is not None
obj = tweet.json()
assert tweet.id == obj["id"]
assert tweet.user.id == obj["user"]["id"]
async def test_user_tweets_and_replies():
api = API(AccountsPool())
mock_gen(api, "user_tweets_and_replies_raw")
tweets = await gather(api.user_tweets_and_replies(2244994945))
assert len(tweets) > 0
for tweet in tweets:
assert tweet.id is not None
assert tweet.user is not None
obj = tweet.json()
assert tweet.id == obj["id"]
assert tweet.user.id == obj["user"]["id"]
async def main():
# prepare mock files from real twitter replies
# you need to have some account to perform this
pool = AccountsPool()
pool.load_from_dir()
api = API(pool)
jobs = [
(Files.search_raw, lambda: api.search_raw("elon musk lang:en", limit=20)),
(Files.user_by_id_raw, lambda: api.user_by_id_raw(2244994945)),
(Files.user_by_login_raw, lambda: api.user_by_login_raw("twitterdev")),
(Files.tweet_details_raw, lambda: api.tweet_details_raw(1649191520250245121)),
(Files.followers_raw, lambda: api.followers_raw(2244994945)),
(Files.following_raw, lambda: api.following_raw(2244994945)),
(Files.retweeters_raw, lambda: api.retweeters_raw(1649191520250245121)),
(Files.favoriters_raw, lambda: api.favoriters_raw(1649191520250245121)),
(Files.user_tweets_raw, lambda: api.user_tweets_raw(2244994945)),
(Files.user_tweets_and_replies_raw, lambda: api.user_tweets_and_replies_raw(2244994945)),
]
for filename, fn in jobs:
filename = os.path.join(DATA_DIR, f"{filename}")
print("-" * 20)
if os.path.exists(filename):
print(f"File {filename} already exists")
continue
print(f"Getting data for {filename}")
rep = fn()
is_coroutine = getattr(rep, "__aiter__", None) is None
data = None
if is_coroutine:
data = await rep
else:
async for x in rep:
data = x
break
if data is None:
print(f"Failed to get data for {filename}")
continue
with open(filename, "w") as fp:
fp.write(data.text)
if __name__ == "__main__":
asyncio.run(main())

5
twapi/__init__.py Обычный файл
Просмотреть файл

@ -0,0 +1,5 @@
# ruff: noqa: F401
from .account import Account
from .accounts_pool import AccountsPool
from .api import API
from .utils import gather

Просмотреть файл

@ -5,10 +5,10 @@ from enum import Enum
from fake_useragent import UserAgent
from httpx import AsyncClient, HTTPStatusError, Response
from loguru import logger
from .constants import LOGIN_URL, TOKEN
from .imap import get_email_code
from .logger import logger
from .utils import raise_for_status
@ -27,7 +27,7 @@ class Account:
with open(filepath) as f:
data = json.load(f)
return cls(**data)
except Exception as e:
except (FileNotFoundError, json.JSONDecodeError) as e:
logger.error(f"Failed to load account {filepath}: {e}")
return None

Просмотреть файл

@ -1,15 +1,44 @@
import asyncio
from loguru import logger
import os
from .account import Account, Status
from .logger import logger
class AccountsPool:
BASE_DIR = "accounts"
def __init__(self):
self.accounts: list[Account] = []
def add_account(self, account: Account):
def load_from_dir(self, folder: str | None = None):
folder = folder or self.BASE_DIR
files = os.listdir(folder)
files = [x for x in files if x.endswith(".json")]
files = [os.path.join(folder, x) for x in files]
for file in files:
account = Account.load(file)
if account:
self.accounts.append(account)
def add_account(
self,
login: str,
password: str,
email: str,
email_password: str,
proxy: str | None = None,
user_agent: str | None = None,
):
filepath = os.path.join(self.BASE_DIR, f"{login}.json")
account = Account.load(filepath)
if account:
self.accounts.append(account)
return
account = Account(login, password, email, email_password, user_agent, proxy)
self.accounts.append(account)
async def login(self):

Просмотреть файл

@ -2,10 +2,10 @@ import time
from typing import Awaitable, Callable
from httpx import AsyncClient, HTTPStatusError, Response
from loguru import logger
from .accounts_pool import AccountsPool
from .constants import GQL_FEATURES, GQL_URL, SEARCH_PARAMS, SEARCH_URL
from .logger import logger
from .models import Tweet, User
from .utils import encode_params, find_item, to_old_obj, to_search_like

Просмотреть файл

@ -3,7 +3,7 @@ import email as emaillib
import imaplib
from datetime import datetime
from loguru import logger
from .logger import logger
def get_imap_domain(email: str) -> str:

16
twapi/logger.py Обычный файл
Просмотреть файл

@ -0,0 +1,16 @@
import sys
from typing import Literal
from loguru import logger
_LEVELS = Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]
_LOG_LEVEL: _LEVELS = "INFO"
def set_log_level(level: _LEVELS):
global _LOG_LEVEL
_LOG_LEVEL = level
logger.remove()
logger.add(sys.stderr, filter=lambda r: r["level"].no >= logger.level(_LOG_LEVEL).no)

Просмотреть файл

@ -3,7 +3,8 @@ from collections import defaultdict
from typing import Any, AsyncGenerator, TypeVar
from httpx import HTTPStatusError, Response
from loguru import logger
from .logger import logger
T = TypeVar("T")