update gql endpoints; add list_timeline test

Этот коммит содержится в:
Vlad Pronsky 2023-11-01 18:17:37 +02:00
родитель 100859c8fa
Коммит fd64ce2018
16 изменённых файлов: 66343 добавлений и 9665 удалений

Просмотреть файл

@ -71,3 +71,4 @@ update-mocks:
twscrape user_tweets --raw --limit 10 2244994945 | jq > ./tests/mocked-data/user_tweets_raw.json twscrape user_tweets --raw --limit 10 2244994945 | jq > ./tests/mocked-data/user_tweets_raw.json
twscrape user_tweets_and_replies --raw --limit 10 2244994945 | jq > ./tests/mocked-data/user_tweets_and_replies_raw.json twscrape user_tweets_and_replies --raw --limit 10 2244994945 | jq > ./tests/mocked-data/user_tweets_and_replies_raw.json
twscrape search --raw --limit 10 "elon musk lang:en" | jq > ./tests/mocked-data/search_raw.json twscrape search --raw --limit 10 "elon musk lang:en" | jq > ./tests/mocked-data/search_raw.json
twscrape list_timeline --raw --limit 10 1494877848087187461 | jq > ./tests/mocked-data/list_timeline_raw.json

Просмотреть файл

@ -1,9 +1,11 @@
import re
import httpx import httpx
# update this url on next run # note: update this url on next run
# url = "https://abs.twimg.com/responsive-web/client-web/api.f4ff3bfa.js" # url = "https://abs.twimg.com/responsive-web/client-web/api.f4ff3bfa.js"
url = "https://abs.twimg.com/responsive-web/client-web/api.bb81931a.js" # url = "https://abs.twimg.com/responsive-web/client-web/api.bb81931a.js"
script = httpx.get(url).text url = "https://abs.twimg.com/responsive-web/client-web/main.45d48c6a.js"
ops = """ ops = """
SearchTimeline SearchTimeline
@ -21,7 +23,13 @@ ListLatestTweetsTimeline
ops = [op.strip() for op in ops.split("\n") if op.strip()] ops = [op.strip() for op in ops.split("\n") if op.strip()]
script: str = httpx.get(url).text
pairs = re.findall(r'queryId:"(.+?)".+?operationName:"(.+?)"', script)
pairs = {op_name: op_id for op_id, op_name in pairs}
for x in ops: for x in ops:
idx = script.split(f'operationName:"{x}"')[0].split("queryId:")[-1] print(f'OP_{x} = "{pairs.get(x, "???")}/{x}"')
idx = idx.strip('",')
print(f'OP_{x} = "{idx}/{x}"') # for ??? check urls:
# https://twitter.com/SpaceX/status/1719132541632864696/likes
# https://twitter.com/i/lists/1494877848087187461

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

54044
tests/mocked-data/list_timeline_raw.json Обычный файл

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -48,16 +48,16 @@
} }
}, },
"fast_followers_count": 0, "fast_followers_count": 0,
"favourites_count": 2078, "favourites_count": 2077,
"followers_count": 590993, "followers_count": 596281,
"friends_count": 1934, "friends_count": 1913,
"has_custom_timelines": true, "has_custom_timelines": true,
"is_translator": false, "is_translator": false,
"listed_count": 2491, "listed_count": 2516,
"location": "127.0.0.1", "location": "127.0.0.1",
"media_count": 815, "media_count": 815,
"name": "Developers", "name": "Developers",
"normal_followers_count": 590993, "normal_followers_count": 596281,
"pinned_tweet_ids_str": [ "pinned_tweet_ids_str": [
"1661790253886177280" "1661790253886177280"
], ],
@ -66,7 +66,7 @@
"profile_image_url_https": "https://pbs.twimg.com/profile_images/1683501992314798080/xl1POYLw_normal.jpg", "profile_image_url_https": "https://pbs.twimg.com/profile_images/1683501992314798080/xl1POYLw_normal.jpg",
"profile_interstitial_type": "", "profile_interstitial_type": "",
"screen_name": "XDevelopers", "screen_name": "XDevelopers",
"statuses_count": 4053, "statuses_count": 4042,
"translator_type": "regular", "translator_type": "regular",
"url": "https://t.co/RUXWsqdGk8", "url": "https://t.co/RUXWsqdGk8",
"verified": false, "verified": false,

Просмотреть файл

@ -48,16 +48,16 @@
} }
}, },
"fast_followers_count": 0, "fast_followers_count": 0,
"favourites_count": 2078, "favourites_count": 2077,
"followers_count": 590993, "followers_count": 596281,
"friends_count": 1934, "friends_count": 1913,
"has_custom_timelines": true, "has_custom_timelines": true,
"is_translator": false, "is_translator": false,
"listed_count": 2491, "listed_count": 2516,
"location": "127.0.0.1", "location": "127.0.0.1",
"media_count": 815, "media_count": 815,
"name": "Developers", "name": "Developers",
"normal_followers_count": 590993, "normal_followers_count": 596281,
"pinned_tweet_ids_str": [ "pinned_tweet_ids_str": [
"1661790253886177280" "1661790253886177280"
], ],
@ -66,7 +66,7 @@
"profile_image_url_https": "https://pbs.twimg.com/profile_images/1683501992314798080/xl1POYLw_normal.jpg", "profile_image_url_https": "https://pbs.twimg.com/profile_images/1683501992314798080/xl1POYLw_normal.jpg",
"profile_interstitial_type": "", "profile_interstitial_type": "",
"screen_name": "XDevelopers", "screen_name": "XDevelopers",
"statuses_count": 4053, "statuses_count": 4042,
"translator_type": "regular", "translator_type": "regular",
"url": "https://t.co/RUXWsqdGk8", "url": "https://t.co/RUXWsqdGk8",
"verified": false, "verified": false,
@ -108,7 +108,7 @@
"from_index": 63, "from_index": 63,
"to_index": 73, "to_index": 73,
"ref": { "ref": {
"url": "If a Blue Publisher charges readers for content, youll need to subscribe directly to their publication to read stories behind their paywall.s", "url": "https://help.twitter.com/en/rules-and-policies/profile-labels",
"url_type": "ExternalUrl" "url_type": "ExternalUrl"
} }
} }

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -252,6 +252,16 @@ async def test_user_tweets_and_replies():
for doc in tweets: for doc in tweets:
check_tweet(doc) check_tweet(doc)
async def test_list_timeline():
api = API()
mock_gen(api, "list_timeline_raw")
tweets = await gather(api.list_timeline(1494877848087187461))
assert len(tweets) > 0
for doc in tweets:
check_tweet(doc)
async def test_tweet_with_video(): async def test_tweet_with_video():
api = API() api = API()

Просмотреть файл

@ -8,6 +8,8 @@ from .models import parse_tweet, parse_tweets, parse_user, parse_users
from .queue_client import QueueClient from .queue_client import QueueClient
from .utils import encode_params, find_obj, get_by_path from .utils import encode_params, find_obj, get_by_path
# Note: kv is variables, ft is features from original GQL request
SEARCH_FEATURES = { SEARCH_FEATURES = {
"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": True, "tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": True,
} }

Просмотреть файл

@ -3,17 +3,18 @@ TOKEN = "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Z
GQL_URL = "https://twitter.com/i/api/graphql" GQL_URL = "https://twitter.com/i/api/graphql"
LOGIN_URL = "https://api.twitter.com/1.1/onboarding/task.json" LOGIN_URL = "https://api.twitter.com/1.1/onboarding/task.json"
OP_SearchTimeline = "3Ej-6N7xXONuEp5eJa1TdQ/SearchTimeline"
OP_SearchTimeline = "lZ0GCEojmtQfiUQa5oJSEw/SearchTimeline"
OP_UserByRestId = "QdS5LJDl99iL_KUzckdfNQ/UserByRestId" OP_UserByRestId = "QdS5LJDl99iL_KUzckdfNQ/UserByRestId"
OP_UserByScreenName = "G3KGOASz96M-Qu0nwmGXNg/UserByScreenName" OP_UserByScreenName = "G3KGOASz96M-Qu0nwmGXNg/UserByScreenName"
OP_TweetDetail = "xOhkmRac04YFZmOzU9PJHg/TweetDetail" OP_TweetDetail = "BbmLpxKh8rX8LNe2LhVujA/TweetDetail"
OP_Followers = "rRXFSG5vR6drKr5M37YOTw/Followers" OP_Followers = "9LlZicVr2IBf4u2qW5n4-A/Followers"
OP_Following = "iSicc7LrzWGBgDPL0tM_TQ/Following" OP_Following = "8cyc0OKedV_XD62fBjzxUw/Following"
OP_Retweeters = "9jBdme5U626ATWp01dvgrA/Retweeters" OP_Retweeters = "Y2XHDEKtlJDA_ql2G3OZZQ/Retweeters"
OP_Favoriters = "VIA2_af01oqZqBB6NvWi-Q/Favoriters" OP_Favoriters = "zXD9lMy1-V_N1OcON9JtEQ/Favoriters"
OP_UserTweets = "H8OOoI-5ZE4NxgRr8lfyWg/UserTweets" OP_UserTweets = "VgitpdpNZ-RUIp5D1Z_D-A/UserTweets"
OP_UserTweetsAndReplies = "Q6aAvPw7azXZbqXzuqTALA/UserTweetsAndReplies" OP_UserTweetsAndReplies = "YlkSUg0mRBx7-EkxCvc-bw/UserTweetsAndReplies"
OP_ListLatestTweetsTimeline = "qHgwF5h2HLowIJ6dHmAP_A/ListLatestTweetsTimeline" OP_ListLatestTweetsTimeline = "d1mUZHaqFMxe0xHI3rVc-w/ListLatestTweetsTimeline"
GQL_FEATURES = { GQL_FEATURES = {
"blue_business_profile_image_shape_enabled": True, "blue_business_profile_image_shape_enabled": True,
@ -40,4 +41,6 @@ GQL_FEATURES = {
"responsive_web_media_download_video_enabled": False, "responsive_web_media_download_video_enabled": False,
"rweb_lists_timeline_redesign_enabled": True, "rweb_lists_timeline_redesign_enabled": True,
"responsive_web_twitter_article_tweet_consumption_enabled": False, "responsive_web_twitter_article_tweet_consumption_enabled": False,
"responsive_web_home_pinned_timelines_enabled": True,
"c9s_tweet_anatomy_moderator_badge_enabled": True,
} }