update mocks; add tests for liked_tweets method; update readme

Этот коммит содержится в:
Vlad Pronsky 2024-02-10 17:29:11 +02:00
родитель 5c5e1d3fa8
Коммит 748ceae053
22 изменённых файлов: 78925 добавлений и 68257 удалений

2
.github/FUNDING.yml поставляемый
Просмотреть файл

@ -1,2 +0,0 @@
github: vladkens
custom: buymeacoffee.com/vladkens

1
.github/workflows/test.yml поставляемый
Просмотреть файл

@ -2,6 +2,7 @@ name: test
on:
push:
pull_request:
env:
PIP_ROOT_USER_ACTION: ignore

Просмотреть файл

@ -1,7 +1,7 @@
all:
@echo "hi"
install:
deps:
@pip install -e .[dev]
build:
@ -56,14 +56,16 @@ test-sq-matrix:
@make test-sq y=2023 v=3440000
update-mocks:
twscrape user_by_id --raw 2244994945 | jq > ./tests/mocked-data/user_by_id_raw.json
twscrape user_by_login --raw xdevelopers | jq > ./tests/mocked-data/user_by_login_raw.json
twscrape followers --raw --limit 10 2244994945 | jq > ./tests/mocked-data/followers_raw.json
twscrape following --raw --limit 10 2244994945 | jq > ./tests/mocked-data/following_raw.json
twscrape tweet_details --raw 1649191520250245121 | jq > ./tests/mocked-data/tweet_details_raw.json
twscrape retweeters --raw --limit 10 1649191520250245121 | jq > ./tests/mocked-data/retweeters_raw.json
twscrape favoriters --raw --limit 10 1649191520250245121 | jq > ./tests/mocked-data/favoriters_raw.json
twscrape user_tweets --raw --limit 10 2244994945 | jq > ./tests/mocked-data/user_tweets_raw.json
twscrape user_tweets_and_replies --raw --limit 10 2244994945 | jq > ./tests/mocked-data/user_tweets_and_replies_raw.json
twscrape search --raw --limit 10 "elon musk lang:en" | jq > ./tests/mocked-data/search_raw.json
twscrape list_timeline --raw --limit 10 1494877848087187461 | jq > ./tests/mocked-data/list_timeline_raw.json
@rm -rf ./tests/mocked-data/raw_*.json
twscrape user_by_id --raw 2244994945 | jq > ./tests/mocked-data/raw_user_by_id.json
twscrape user_by_login --raw xdevelopers | jq > ./tests/mocked-data/raw_user_by_login.json
twscrape followers --raw --limit 10 2244994945 | jq > ./tests/mocked-data/raw_followers.json
twscrape following --raw --limit 10 2244994945 | jq > ./tests/mocked-data/raw_following.json
twscrape tweet_details --raw 1649191520250245121 | jq > ./tests/mocked-data/raw_tweet_details.json
twscrape retweeters --raw --limit 10 1649191520250245121 | jq > ./tests/mocked-data/raw_retweeters.json
twscrape favoriters --raw --limit 10 1649191520250245121 | jq > ./tests/mocked-data/raw_favoriters.json
twscrape user_tweets --raw --limit 10 2244994945 | jq > ./tests/mocked-data/raw_user_tweets.json
twscrape user_tweets_and_replies --raw --limit 10 2244994945 | jq > ./tests/mocked-data/raw_user_tweets_and_replies.json
twscrape search --raw --limit 10 "elon musk lang:en" | jq > ./tests/mocked-data/raw_search.json
twscrape list_timeline --raw --limit 10 1494877848087187461 | jq > ./tests/mocked-data/raw_list_timeline.json
twscrape likes --raw --limit 10 2244994945 | jq > ./tests/mocked-data/raw_likes.json

Просмотреть файл

@ -7,21 +7,9 @@ from fake_useragent import UserAgent
client = httpx.Client(headers={"user-agent": UserAgent().chrome})
ops = """
SearchTimeline
UserByRestId
UserByScreenName
TweetDetail
Followers
Following
Retweeters
Favoriters
UserTweets
UserTweetsAndReplies
ListLatestTweetsTimeline
"""
ops = [op.strip() for op in ops.split("\n") if op.strip()]
with open("./twscrape/api.py") as fp:
ops = [x.strip() for x in fp.read().split("\n")]
ops = [x.split("=")[0].removeprefix("OP_").strip() for x in ops if x.startswith("OP_")]
def script_url(k: str, v: str):

Просмотреть файл

@ -27,7 +27,7 @@ dependencies = [
[project.optional-dependencies]
dev = [
"pyright>=1.1.344",
"pyright>=1.1.350",
"pytest-asyncio>=0.23.3",
"pytest-cov>=4.1.0",
"pytest-httpx>=0.28.0",

Просмотреть файл

@ -89,6 +89,7 @@ async def main():
await gather(api.following(user_id, limit=20)) # list[User]
await gather(api.user_tweets(user_id, limit=20)) # list[Tweet]
await gather(api.user_tweets_and_replies(user_id, limit=20)) # list[Tweet]
await gather(api.liked_tweets(user_id, limit=20)) # list[Tweet]
# list info
list_id = 123456789
@ -139,36 +140,60 @@ twscrape
twscrape search --help
```
### Add accounts & login
First add accounts from file:
### Add accounts
To add accounts use `add_accounts` command. Command syntax is:
```sh
# twscrape add_accounts <file_path> <line_format>
# line_format should have "username", "password", "email", "email_password" tokens
# note: tokens delimeter should be same as an file
twscrape add_accounts ./accounts.txt username:password:email:email_password
twscrape add_accounts <file_path> <line_format>
```
Then call login:
Where:
`<line_format>` is format of line if accounts file splited by delimeter. Possible tokens:
- `username` – required
- `password` – required
- `email` – required
- `email_password` – to receive email code (you can use `--manual` mode to get code)
- `cookies` – can be any parsable format (string, json, base64 string, etc)
- `_` – skip column from parse
Tokens should be splited by delimeter, usually "`:`" used.
Example:
I have account files named `order-12345.txt` with format:
```text
username:password:email:email password:user_agent:cookies
```
Command to add accounts will be (user_agent column skiped with `_`):
```sh
twscrape add_accounts ./order-12345.txt username:password:email:email_password:_:cookies
```
### Login accounts
_Note: If you added accounts with cookies, login not required._
Run:
```sh
twscrape login_accounts
```
Accounts and their sessions will be saved, so they can be reused for future requests
`twscrape` will start login flow for each new account. If X will ask to verify email and you provided `email_password` in `add_account`, then `twscrape` will try to receive verification map by IMAP protocol. After success login account cookies will be saved to db file for future use.
Note: Possible to use `_` in `line_format` to skip some value
#### Manual email verefication
### Add accounts with cookies
In case your email provider not support IMAP protocol (ProtonMail, Tutanota, etc) or IMAP is disabled in settings, you can enter email verification code manually. To do this run login command with `--manual` flag.
Use `cookies` param in `line_format`, e.g.:
Example:
```sh
twscrape add_accounts ./accounts.txt username:password:email:email_password:cookies
twscrape login_accounts --manual
twscrape relogin user1 user2 --manual
twscrape relogin_failed --manual
```
In this case login not required.
### Get list of accounts and their statuses
@ -196,16 +221,6 @@ Or retry login for all failed logins:
twscrape relogin_failed
```
### Enter email verification code manually
twscrape may not be able to access some emails (if imap is disabled or does not exist at all - eg in protonmail), in such cases you can use `--manual` flag, which allows to enter the verification code manually.
```sh
twscrape login_accounts --manual
twscrape relogin user1 user2 --manual
twscrape relogin_failed --manual
```
### Use different accounts file
Useful if using a different set of accounts for different actions
@ -227,6 +242,7 @@ twscrape followers USER_ID --limit=20
twscrape following USER_ID --limit=20
twscrape user_tweets USER_ID --limit=20
twscrape user_tweets_and_replies USER_ID --limit=20
twscrape liked_tweets USER_ID --limit=20
```
The default output is in the console (stdout), one document per line. So it can be redirected to the file.

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

13195
tests/mocked-data/raw_likes.json Обычный файл

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Разница между файлами не показана из-за своего большого размера Загрузить разницу

3070
tests/mocked-data/raw_search.json Обычный файл

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -48,16 +48,16 @@
}
},
"fast_followers_count": 0,
"favourites_count": 2075,
"followers_count": 600385,
"friends_count": 1800,
"favourites_count": 2073,
"followers_count": 607490,
"friends_count": 1773,
"has_custom_timelines": true,
"is_translator": false,
"listed_count": 2551,
"listed_count": 2572,
"location": "127.0.0.1",
"media_count": 815,
"name": "Developers",
"normal_followers_count": 600385,
"normal_followers_count": 607490,
"pinned_tweet_ids_str": [
"1661790253886177280"
],
@ -66,7 +66,7 @@
"profile_image_url_https": "https://pbs.twimg.com/profile_images/1683501992314798080/xl1POYLw_normal.jpg",
"profile_interstitial_type": "",
"screen_name": "XDevelopers",
"statuses_count": 4042,
"statuses_count": 4041,
"translator_type": "regular",
"url": "https://t.co/RUXWsqdGk8",
"verified": false,

Просмотреть файл

@ -48,16 +48,16 @@
}
},
"fast_followers_count": 0,
"favourites_count": 2075,
"followers_count": 600385,
"friends_count": 1800,
"favourites_count": 2073,
"followers_count": 607490,
"friends_count": 1773,
"has_custom_timelines": true,
"is_translator": false,
"listed_count": 2551,
"listed_count": 2572,
"location": "127.0.0.1",
"media_count": 815,
"name": "Developers",
"normal_followers_count": 600385,
"normal_followers_count": 607490,
"pinned_tweet_ids_str": [
"1661790253886177280"
],
@ -66,7 +66,7 @@
"profile_image_url_https": "https://pbs.twimg.com/profile_images/1683501992314798080/xl1POYLw_normal.jpg",
"profile_interstitial_type": "",
"screen_name": "XDevelopers",
"statuses_count": 4042,
"statuses_count": 4041,
"translator_type": "regular",
"url": "https://t.co/RUXWsqdGk8",
"verified": false,

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,5 +1,6 @@
import json
import os
from typing import Callable
from twscrape import API, gather
from twscrape.logger import set_log_level
@ -22,38 +23,28 @@ class FakeRep:
return json.loads(self.text)
def load_mock(name: str):
file = os.path.join(os.path.dirname(__file__), f"mocked-data/{name}.json")
with open(file) as f:
return json.load(f)
def fake_rep(fn: str, filename: str):
if not filename.startswith("/"):
filename = os.path.join(DATA_DIR, filename)
if not filename.endswith(".json"):
filename += ".json"
def fake_rep(filename: str):
filename = filename if filename.endswith(".json") else f"{filename}.json"
filename = filename if filename.startswith("/") else os.path.join(DATA_DIR, filename)
with open(filename) as fp:
data = fp.read()
return FakeRep(fp.read())
rep = FakeRep(data)
def mock_rep(fn: Callable, filename: str, as_generator=False):
rep = fake_rep(filename)
async def cb_rep(*args, **kwargs):
return rep
def mock_rep(obj, fn: str, filename: str | None = None):
async def cb_rep(*args, **kwargs):
return fake_rep(fn, filename or fn)
setattr(obj, fn, cb_rep)
def mock_gen(obj, fn: str):
async def cb_gen(*args, **kwargs):
yield fake_rep(fn, fn)
yield rep
setattr(obj, fn, cb_gen)
assert "__self__" in dir(fn)
cb = cb_gen if as_generator else cb_rep
cb.__name__ = fn.__name__
cb.__self__ = fn.__self__ # pyright: ignore
setattr(fn.__self__, fn.__name__, cb) # pyright: ignore
def check_tweet(doc: Tweet | None):
@ -138,7 +129,7 @@ def check_user(doc: User):
async def test_search():
api = API()
mock_gen(api, "search_raw")
mock_rep(api.search_raw, "raw_search", as_generator=True)
items = await gather(api.search("elon musk lang:en", limit=20))
assert len(items) > 0
@ -149,7 +140,7 @@ async def test_search():
async def test_user_by_id():
api = API()
mock_rep(api, "user_by_id_raw")
mock_rep(api.user_by_id_raw, "raw_user_by_id")
doc = await api.user_by_id(2244994945)
assert doc is not None
@ -167,7 +158,7 @@ async def test_user_by_id():
async def test_user_by_login():
api = API()
mock_rep(api, "user_by_login_raw")
mock_rep(api.user_by_login_raw, "raw_user_by_login")
doc = await api.user_by_login("xdevelopers")
assert doc is not None
@ -185,7 +176,7 @@ async def test_user_by_login():
async def test_tweet_details():
api = API()
mock_rep(api, "tweet_details_raw")
mock_rep(api.tweet_details_raw, "raw_tweet_details")
doc = await api.tweet_details(1649191520250245121)
assert doc is not None, "tweet should not be None"
@ -197,7 +188,7 @@ async def test_tweet_details():
async def test_followers():
api = API()
mock_gen(api, "followers_raw")
mock_rep(api.followers_raw, "raw_followers", as_generator=True)
users = await gather(api.followers(2244994945))
assert len(users) > 0
@ -208,7 +199,7 @@ async def test_followers():
async def test_following():
api = API()
mock_gen(api, "following_raw")
mock_rep(api.following_raw, "raw_following", as_generator=True)
users = await gather(api.following(2244994945))
assert len(users) > 0
@ -219,7 +210,7 @@ async def test_following():
async def test_retweters():
api = API()
mock_gen(api, "retweeters_raw")
mock_rep(api.retweeters_raw, "raw_retweeters", as_generator=True)
users = await gather(api.retweeters(1649191520250245121))
assert len(users) > 0
@ -230,7 +221,7 @@ async def test_retweters():
async def test_favoriters():
api = API()
mock_gen(api, "favoriters_raw")
mock_rep(api.favoriters_raw, "raw_favoriters", as_generator=True)
users = await gather(api.favoriters(1649191520250245121))
assert len(users) > 0
@ -241,7 +232,7 @@ async def test_favoriters():
async def test_user_tweets():
api = API()
mock_gen(api, "user_tweets_raw")
mock_rep(api.user_tweets_raw, "raw_user_tweets", as_generator=True)
tweets = await gather(api.user_tweets(2244994945))
assert len(tweets) > 0
@ -252,7 +243,7 @@ async def test_user_tweets():
async def test_user_tweets_and_replies():
api = API()
mock_gen(api, "user_tweets_and_replies_raw")
mock_rep(api.user_tweets_and_replies_raw, "raw_user_tweets_and_replies", as_generator=True)
tweets = await gather(api.user_tweets_and_replies(2244994945))
assert len(tweets) > 0
@ -263,7 +254,7 @@ async def test_user_tweets_and_replies():
async def test_list_timeline():
api = API()
mock_gen(api, "list_timeline_raw")
mock_rep(api.list_timeline_raw, "raw_list_timeline", as_generator=True)
tweets = await gather(api.list_timeline(1494877848087187461))
assert len(tweets) > 0
@ -272,6 +263,17 @@ async def test_list_timeline():
check_tweet(doc)
async def test_likes():
api = API()
mock_rep(api.liked_tweets_raw, "raw_likes", as_generator=True)
tweets = await gather(api.liked_tweets(2244994945))
assert len(tweets) > 0
for doc in tweets:
check_tweet(doc)
async def test_tweet_with_video():
api = API()
@ -281,7 +283,7 @@ async def test_tweet_with_video():
]
for file, twid in files:
mock_rep(api, "tweet_details_raw", file)
mock_rep(api.tweet_details_raw, file)
doc = await api.tweet_details(twid)
assert doc is not None
check_tweet(doc)
@ -290,7 +292,7 @@ async def test_tweet_with_video():
async def test_issue_28():
api = API()
mock_rep(api, "tweet_details_raw", "_issue_28_1")
mock_rep(api.tweet_details_raw, "_issue_28_1")
doc = await api.tweet_details(1658409412799737856)
assert doc is not None
check_tweet(doc)
@ -304,7 +306,7 @@ async def test_issue_28():
assert doc.viewCount == doc.retweetedTweet.viewCount
check_tweet(doc.retweetedTweet)
mock_rep(api, "tweet_details_raw", "_issue_28_2")
mock_rep(api.tweet_details_raw, "_issue_28_2")
doc = await api.tweet_details(1658421690001502208)
assert doc is not None
check_tweet(doc)
@ -318,7 +320,7 @@ async def test_issue_28():
async def test_issue_42():
raw = load_mock("_issue_42")
raw = fake_rep("_issue_42").json()
doc = parse_tweet(raw, 1665951747842641921)
assert doc is not None
assert doc.retweetedTweet is not None
@ -328,7 +330,7 @@ async def test_issue_42():
async def test_issue_56():
raw = load_mock("_issue_56")
raw = fake_rep("_issue_56").json()
doc = parse_tweet(raw, 1682072224013099008)
assert doc is not None
assert len(set([x.tcourl for x in doc.links])) == len(doc.links)

Просмотреть файл

@ -6,18 +6,18 @@ from .models import Tweet, User, parse_tweet, parse_tweets, parse_user, parse_us
from .queue_client import QueueClient
from .utils import encode_params, find_obj, get_by_path
OP_SearchTimeline = "Aj1nGkALq99Xg3XI0OZBtw/SearchTimeline"
OP_UserByRestId = "CO4_gU4G_MRREoqfiTh6Hg/UserByRestId"
OP_UserByScreenName = "NimuplG1OB7Fd2btCLdBOw/UserByScreenName"
OP_TweetDetail = "-H4B_lJDEA-O_7_qWaRiyg/TweetDetail"
OP_Followers = "3_7xfjmh897x8h_n6QBqTA/Followers"
OP_Following = "0yD6Eiv23DKXRDU9VxlG2A/Following"
OP_Retweeters = "sOBhVzDeJl4XGepvi5pHlg/Retweeters"
OP_Favoriters = "E-ZTxvWWIkmOKwYdNTEefg/Favoriters"
OP_UserTweets = "V1ze5q3ijDS1VeLwLY0m7g/UserTweets"
OP_UserTweetsAndReplies = "16nOjYqEdV04vN6-rgg8KA/UserTweetsAndReplies"
OP_ListLatestTweetsTimeline = "whF0_KH1fCkdLLoyNPMoEw/ListLatestTweetsTimeline"
OP_Likes = "IohM3gxQHfvWePH5E3KuNA/Likes"
OP_SearchTimeline = "fZK7JipRHWtiZsTodhsTfQ/SearchTimeline"
OP_UserByRestId = "tD8zKvQzwY3kdx5yz6YmOw/UserByRestId"
OP_UserByScreenName = "k5XapwcSikNsEsILW5FvgA/UserByScreenName"
OP_TweetDetail = "B9_KmbkLhXt6jRwGjJrweg/TweetDetail"
OP_Followers = "ZG1BQPaRSg04qo55kKaW2g/Followers"
OP_Following = "PAnE9toEjRfE-4tozRcsfw/Following"
OP_Retweeters = "X-XEqG5qHQSAwmvy00xfyQ/Retweeters"
OP_Favoriters = "LLkw5EcVutJL6y-2gkz22A/Favoriters"
OP_UserTweets = "5ICa5d9-AitXZrIA3H-4MQ/UserTweets"
OP_UserTweetsAndReplies = "UtLStR_BnYUGD7Q453UXQg/UserTweetsAndReplies"
OP_ListLatestTweetsTimeline = "HjsWc-nwwHKYwHenbHm-tw/ListLatestTweetsTimeline"
OP_Likes = "9s8V6sUI8fZLDiN-REkAxA/Likes"
GQL_URL = "https://twitter.com/i/api/graphql"
@ -308,7 +308,8 @@ class API:
yield x
# likes
async def likes_raw(self, uid: int, limit=-1, kv=None):
async def liked_tweets_raw(self, uid: int, limit=-1, kv=None):
op = OP_Likes
kv = {
"userId": str(uid),
@ -321,7 +322,7 @@ class API:
async for x in self._gql_items(op, kv, limit=limit):
yield x
async def likes(self, uid: int, limit=-1, kv=None):
async for rep in self.likes_raw(uid, limit=limit, kv=kv):
async def liked_tweets(self, uid: int, limit=-1, kv=None):
async for rep in self.liked_tweets_raw(uid, limit=limit, kv=kv):
for x in parse_tweets(rep.json(), limit):
yield x

Просмотреть файл

@ -192,7 +192,7 @@ def run():
c_lim("user_tweets", "Get user tweets", "user_id", "User ID", int)
c_lim("user_tweets_and_replies", "Get user tweets and replies", "user_id", "User ID", int)
c_lim("list_timeline", "Get tweets from list", "list_id", "List ID", int)
c_lim("likes", "Get user's liked tweets", "user_id", "User ID", int)
c_lim("liked_tweets", "Get user's liked tweets", "user_id", "User ID", int)
args = p.parse_args()
if args.command is None: