зеркало из
https://github.com/viginum-datalab/twscrape.git
synced 2025-10-29 13:06:13 +02:00
move constants to single file; improve logging
Этот коммит содержится в:
родитель
eadf053f12
Коммит
410a1fb9a4
@ -7,12 +7,10 @@ from fake_useragent import UserAgent
|
||||
from httpx import AsyncClient, HTTPStatusError, Response
|
||||
from loguru import logger
|
||||
|
||||
from .constants import LOGIN_URL, TOKEN
|
||||
from .imap import get_email_code
|
||||
from .utils import raise_for_status
|
||||
|
||||
TOKEN = "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA"
|
||||
TASK_URL = "https://api.twitter.com/1.1/onboarding/task.json"
|
||||
|
||||
|
||||
class Status(str, Enum):
|
||||
NEW = "new"
|
||||
@ -151,7 +149,7 @@ class Account:
|
||||
"subtask_versions": {},
|
||||
}
|
||||
|
||||
rep = await self.client.post(TASK_URL, params={"flow_name": "login"}, json=payload)
|
||||
rep = await self.client.post(LOGIN_URL, params={"flow_name": "login"}, json=payload)
|
||||
raise_for_status(rep, "login_initiate")
|
||||
return rep
|
||||
|
||||
@ -201,7 +199,7 @@ class Account:
|
||||
],
|
||||
}
|
||||
|
||||
rep = await self.client.post(TASK_URL, json=payload)
|
||||
rep = await self.client.post(LOGIN_URL, json=payload)
|
||||
raise_for_status(rep, "login_instrumentation")
|
||||
return rep
|
||||
|
||||
@ -224,7 +222,7 @@ class Account:
|
||||
],
|
||||
}
|
||||
|
||||
rep = await self.client.post(TASK_URL, json=payload)
|
||||
rep = await self.client.post(LOGIN_URL, json=payload)
|
||||
raise_for_status(rep, "login_username")
|
||||
return rep
|
||||
|
||||
@ -239,7 +237,7 @@ class Account:
|
||||
],
|
||||
}
|
||||
|
||||
rep = await self.client.post(TASK_URL, json=payload)
|
||||
rep = await self.client.post(LOGIN_URL, json=payload)
|
||||
raise_for_status(rep, "login_password")
|
||||
return rep
|
||||
|
||||
@ -254,7 +252,7 @@ class Account:
|
||||
],
|
||||
}
|
||||
|
||||
rep = await self.client.post(TASK_URL, json=payload)
|
||||
rep = await self.client.post(LOGIN_URL, json=payload)
|
||||
raise_for_status(rep, "login_duplication_check")
|
||||
return rep
|
||||
|
||||
@ -269,7 +267,7 @@ class Account:
|
||||
],
|
||||
}
|
||||
|
||||
rep = await self.client.post(TASK_URL, json=payload)
|
||||
rep = await self.client.post(LOGIN_URL, json=payload)
|
||||
raise_for_status(rep, "login_confirm_email")
|
||||
return rep
|
||||
|
||||
@ -287,7 +285,7 @@ class Account:
|
||||
],
|
||||
}
|
||||
|
||||
rep = await self.client.post(TASK_URL, json=payload)
|
||||
rep = await self.client.post(LOGIN_URL, json=payload)
|
||||
raise_for_status(rep, "login_confirm_email")
|
||||
return rep
|
||||
|
||||
@ -297,6 +295,6 @@ class Account:
|
||||
"subtask_inputs": [],
|
||||
}
|
||||
|
||||
rep = await self.client.post(TASK_URL, json=payload)
|
||||
rep = await self.client.post(LOGIN_URL, json=payload)
|
||||
raise_for_status(rep, "login_success")
|
||||
return rep
|
||||
@ -2,7 +2,7 @@ import asyncio
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from .client import Account
|
||||
from .account import Account
|
||||
|
||||
|
||||
class AccountsPool:
|
||||
@ -12,6 +12,12 @@ class AccountsPool:
|
||||
def add_account(self, account: Account):
|
||||
self.accounts.append(account)
|
||||
|
||||
def get_login_by_token(self, auth_token: str) -> str:
|
||||
for x in self.accounts:
|
||||
if x.client.cookies.get("auth_token") == auth_token:
|
||||
return x.username
|
||||
return "UNKNOWN"
|
||||
|
||||
def get_account(self, queue: str) -> Account | None:
|
||||
for x in self.accounts:
|
||||
if x.can_use(queue):
|
||||
66
twapi/constants.py
Обычный файл
66
twapi/constants.py
Обычный файл
@ -0,0 +1,66 @@
|
||||
TOKEN = "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA" # noqa: E501
|
||||
|
||||
GQL_URL = "https://twitter.com/i/api/graphql"
|
||||
LOGIN_URL = "https://api.twitter.com/1.1/onboarding/task.json"
|
||||
SEARCH_URL = "https://api.twitter.com/2/search/adaptive.json"
|
||||
|
||||
GQL_FEATURES = {
|
||||
"blue_business_profile_image_shape_enabled": True,
|
||||
"responsive_web_graphql_exclude_directive_enabled": True,
|
||||
"verified_phone_label_enabled": False,
|
||||
"responsive_web_graphql_skip_user_profile_image_extensions_enabled": False,
|
||||
"responsive_web_graphql_timeline_navigation_enabled": True,
|
||||
"tweetypie_unmention_optimization_enabled": True,
|
||||
"vibe_api_enabled": True,
|
||||
"responsive_web_edit_tweet_api_enabled": True,
|
||||
"graphql_is_translatable_rweb_tweet_is_translatable_enabled": True,
|
||||
"view_counts_everywhere_api_enabled": True,
|
||||
"longform_notetweets_consumption_enabled": True,
|
||||
"tweet_awards_web_tipping_enabled": False,
|
||||
"freedom_of_speech_not_reach_fetch_enabled": True,
|
||||
"standardized_nudges_misinfo": True,
|
||||
"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": False,
|
||||
"interactive_text_enabled": True,
|
||||
"responsive_web_text_conversations_enabled": False,
|
||||
"longform_notetweets_rich_text_read_enabled": True,
|
||||
"responsive_web_enhance_cards_enabled": False,
|
||||
}
|
||||
|
||||
SEARCH_PARAMS = {
|
||||
"include_profile_interstitial_type": "1",
|
||||
"include_blocking": "1",
|
||||
"include_blocked_by": "1",
|
||||
"include_followed_by": "1",
|
||||
"include_want_retweets": "1",
|
||||
"include_mute_edge": "1",
|
||||
"include_can_dm": "1",
|
||||
"include_can_media_tag": "1",
|
||||
"include_ext_has_nft_avatar": "1",
|
||||
"include_ext_is_blue_verified": "1",
|
||||
"include_ext_verified_type": "1",
|
||||
"include_ext_profile_image_shape": "1",
|
||||
"skip_status": "1",
|
||||
"cards_platform": "Web-12",
|
||||
"include_cards": "1",
|
||||
"include_ext_alt_text": "true",
|
||||
"include_ext_limited_action_results": "false",
|
||||
"include_quote_count": "true",
|
||||
"include_reply_count": "1",
|
||||
"tweet_mode": "extended",
|
||||
"include_ext_views": "true",
|
||||
"include_entities": "true",
|
||||
"include_user_entities": "true",
|
||||
"include_ext_media_color": "true",
|
||||
"include_ext_media_availability": "true",
|
||||
"include_ext_sensitive_media_warning": "true",
|
||||
"include_ext_trusted_friends_metadata": "true",
|
||||
"send_error_codes": "true",
|
||||
"simple_quoted_tweet": "true",
|
||||
"tweet_search_mode": "live",
|
||||
"query_source": "typed_query",
|
||||
"count": "20",
|
||||
"pc": "1",
|
||||
"spelling_corrections": "1",
|
||||
"include_ext_edit_control": "true",
|
||||
"ext": "mediaStats,highlightedLabel,hasNftAvatar,voiceInfo,birdwatchPivot,enrichments,superFollowMetadata,unmentionInfo,editControl,vibe", # noqa: E501
|
||||
}
|
||||
107
twapi/search.py
107
twapi/search.py
@ -1,106 +1,29 @@
|
||||
import json
|
||||
from time import time
|
||||
import time
|
||||
from typing import Awaitable, Callable
|
||||
|
||||
from httpx import AsyncClient, HTTPStatusError, Response
|
||||
from loguru import logger
|
||||
|
||||
from .accounts_pool import AccountsPool
|
||||
from .constants import GQL_FEATURES, GQL_URL, SEARCH_PARAMS, SEARCH_URL
|
||||
from .models import Tweet, User
|
||||
from .pool import AccountsPool
|
||||
from .utils import encode_params, find_item, to_old_obj, to_search_like
|
||||
|
||||
BASIC_SEARCH_PARAMS = """
|
||||
include_profile_interstitial_type=1
|
||||
include_blocking=1
|
||||
include_blocked_by=1
|
||||
include_followed_by=1
|
||||
include_want_retweets=1
|
||||
include_mute_edge=1
|
||||
include_can_dm=1
|
||||
include_can_media_tag=1
|
||||
include_ext_has_nft_avatar=1
|
||||
include_ext_is_blue_verified=1
|
||||
include_ext_verified_type=1
|
||||
include_ext_profile_image_shape=1
|
||||
skip_status=1
|
||||
cards_platform=Web-12
|
||||
include_cards=1
|
||||
include_ext_alt_text=true
|
||||
include_ext_limited_action_results=false
|
||||
include_quote_count=true
|
||||
include_reply_count=1
|
||||
tweet_mode=extended
|
||||
include_ext_views=true
|
||||
include_entities=true
|
||||
include_user_entities=true
|
||||
include_ext_media_color=true
|
||||
include_ext_media_availability=true
|
||||
include_ext_sensitive_media_warning=true
|
||||
include_ext_trusted_friends_metadata=true
|
||||
send_error_codes=true
|
||||
simple_quoted_tweet=true
|
||||
tweet_search_mode=live
|
||||
query_source=recent_search_click
|
||||
pc=1
|
||||
spelling_corrections=1
|
||||
include_ext_edit_control=true
|
||||
ext=mediaStats%2ChighlightedLabel%2ChasNftAvatar%2CvoiceInfo%2CbirdwatchPivot%2Cenrichments%2CsuperFollowMetadata%2CunmentionInfo%2CeditControl%2Cvibe
|
||||
"""
|
||||
|
||||
BASE_FEATURES = {
|
||||
"blue_business_profile_image_shape_enabled": True,
|
||||
"responsive_web_graphql_exclude_directive_enabled": True,
|
||||
"verified_phone_label_enabled": False,
|
||||
"responsive_web_graphql_skip_user_profile_image_extensions_enabled": False,
|
||||
"responsive_web_graphql_timeline_navigation_enabled": True,
|
||||
#
|
||||
"tweetypie_unmention_optimization_enabled": True,
|
||||
"vibe_api_enabled": True,
|
||||
"responsive_web_edit_tweet_api_enabled": True,
|
||||
"graphql_is_translatable_rweb_tweet_is_translatable_enabled": True,
|
||||
"view_counts_everywhere_api_enabled": True,
|
||||
"longform_notetweets_consumption_enabled": True,
|
||||
"tweet_awards_web_tipping_enabled": False,
|
||||
"freedom_of_speech_not_reach_fetch_enabled": True,
|
||||
"standardized_nudges_misinfo": True,
|
||||
"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": False,
|
||||
"interactive_text_enabled": True,
|
||||
"responsive_web_text_conversations_enabled": False,
|
||||
"longform_notetweets_rich_text_read_enabled": True,
|
||||
"responsive_web_enhance_cards_enabled": False,
|
||||
}
|
||||
|
||||
SEARCH_URL = "https://api.twitter.com/2/search/adaptive.json"
|
||||
SEARCH_PARAMS = dict(x.split("=") for x in BASIC_SEARCH_PARAMS.splitlines() if x)
|
||||
GRAPHQL_URL = "https://twitter.com/i/api/graphql/"
|
||||
|
||||
|
||||
def filter_null(obj: dict):
|
||||
try:
|
||||
return {k: v for k, v in obj.items() if v is not None}
|
||||
except AttributeError:
|
||||
return obj
|
||||
|
||||
|
||||
def json_params(obj: dict):
|
||||
return {k: json.dumps(filter_null(v), separators=(",", ":")) for k, v in obj.items()}
|
||||
|
||||
|
||||
def get_ql_entries(obj: dict) -> list[dict]:
|
||||
entries = find_item(obj, "entries")
|
||||
return entries or []
|
||||
|
||||
|
||||
class Search:
|
||||
def __init__(self, pool: AccountsPool):
|
||||
self.pool = pool
|
||||
|
||||
# http helpers
|
||||
# http helpers
|
||||
|
||||
def _limit_msg(self, rep: Response):
|
||||
lr = rep.headers.get("x-rate-limit-remaining", -1)
|
||||
ll = rep.headers.get("x-rate-limit-limit", -1)
|
||||
return f"{lr}/{ll}"
|
||||
|
||||
auth_token = rep.request.headers["cookie"].split("auth_token=")[1].split(";")[0]
|
||||
username = self.pool.get_login_by_token(auth_token)
|
||||
|
||||
return f"{username} {lr}/{ll}"
|
||||
|
||||
def _is_end(self, rep: Response, q: str, res: list, cur: str | None, cnt: int, lim: int):
|
||||
new_count = len(res)
|
||||
@ -170,12 +93,12 @@ class Search:
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
async def _ql_items(self, op: str, kv: dict, ft: dict = {}, limit=-1):
|
||||
async def _ql_items(self, op: str, kv: dict, limit=-1):
|
||||
queue, cursor, count = op.split("/")[-1], None, 0
|
||||
|
||||
async def _get(client: AsyncClient):
|
||||
params = {"variables": {**kv, "cursor": cursor}, "features": BASE_FEATURES}
|
||||
return await client.get(f"{GRAPHQL_URL}/{op}", params=encode_params(params))
|
||||
params = {"variables": {**kv, "cursor": cursor}, "features": GQL_FEATURES}
|
||||
return await client.get(f"{GQL_URL}/{op}", params=encode_params(params))
|
||||
|
||||
async for rep in self._inf_req(queue, _get):
|
||||
obj = rep.json()
|
||||
@ -197,11 +120,9 @@ class Search:
|
||||
return
|
||||
|
||||
async def _ql_item(self, op: str, kv: dict, ft: dict = {}):
|
||||
variables, features = {**kv}, {**BASE_FEATURES, **ft}
|
||||
params = {"variables": variables, "features": features}
|
||||
|
||||
async def _get(client: AsyncClient):
|
||||
return await client.get(f"{GRAPHQL_URL}/{op}", params=encode_params(params))
|
||||
params = {"variables": {**kv}, "features": {**GQL_FEATURES, **ft}}
|
||||
return await client.get(f"{GQL_URL}/{op}", params=encode_params(params))
|
||||
|
||||
queue = op.split("/")[-1]
|
||||
async for rep in self._inf_req(queue, _get):
|
||||
|
||||
Загрузка…
x
Ссылка в новой задаче
Block a user