added broadcast / audiospace cards #191

Этот коммит содержится в:
Vlad Pronsky 2024-06-29 20:28:06 +03:00
родитель 3c4bbe1d8e
Коммит fe18dd1e17
13 изменённых файлов: 1587 добавлений и 14752 удалений

Просмотреть файл

@ -71,3 +71,7 @@ update-mocks:
twscrape list_timeline --raw --limit 10 1494877848087187461 | jq > ./tests/mocked-data/raw_list_timeline.json twscrape list_timeline --raw --limit 10 1494877848087187461 | jq > ./tests/mocked-data/raw_list_timeline.json
@# twscrape favoriters --raw --limit 10 1649191520250245121 | jq > ./tests/mocked-data/raw_favoriters.json @# twscrape favoriters --raw --limit 10 1649191520250245121 | jq > ./tests/mocked-data/raw_favoriters.json
@# twscrape liked_tweets --raw --limit 10 2244994945 | jq > ./tests/mocked-data/raw_likes.json @# twscrape liked_tweets --raw --limit 10 2244994945 | jq > ./tests/mocked-data/raw_likes.json
x:
twscrape tweet_details --raw 1790441814857826439 | jq > ./tests/mocked-data/card_broadcast.json
twscrape tweet_details --raw 1789054061729173804 | jq > ./tests/mocked-data/card_audiospace.json

Просмотреть файл

@ -2,8 +2,11 @@ import pytest
from twscrape.accounts_pool import AccountsPool from twscrape.accounts_pool import AccountsPool
from twscrape.api import API from twscrape.api import API
from twscrape.logger import set_log_level
from twscrape.queue_client import QueueClient from twscrape.queue_client import QueueClient
set_log_level("ERROR")
@pytest.fixture @pytest.fixture
def pool_mock(tmp_path): def pool_mock(tmp_path):

433
tests/mocked-data/card_audiospace.json Обычный файл
Просмотреть файл

@ -0,0 +1,433 @@
{
"data": {
"threaded_conversation_with_injections_v2": {
"instructions": [
{
"type": "TimelineAddEntries",
"entries": [
{
"entryId": "tweet-1789054061729173804",
"sortIndex": "7434317975125602003",
"content": {
"entryType": "TimelineTimelineItem",
"__typename": "TimelineTimelineItem",
"itemContent": {
"itemType": "TimelineTweet",
"__typename": "TimelineTweet",
"tweet_results": {
"result": {
"__typename": "Tweet",
"rest_id": "1789054061729173804",
"has_birdwatch_notes": false,
"core": {
"user_results": {
"result": {
"__typename": "User",
"id": "VXNlcjoyMjQ0OTk0OTQ1",
"rest_id": "2244994945",
"affiliates_highlighted_label": {
"label": {
"url": {
"url": "https://twitter.com/X",
"urlType": "DeepLink"
},
"badge": {
"url": "https://pbs.twimg.com/profile_images/1683899100922511378/5lY42eHs_bigger.jpg"
},
"description": "X",
"userLabelType": "BusinessLabel",
"userLabelDisplayType": "Badge"
}
},
"has_graduated_access": true,
"is_blue_verified": true,
"profile_image_shape": "Square",
"legacy": {
"can_dm": true,
"can_media_tag": true,
"created_at": "Sat Dec 14 04:35:55 +0000 2013",
"default_profile": false,
"default_profile_image": false,
"description": "The voice of the X Dev team and your official source for updates, news, and events, related to the X API.",
"entities": {
"description": {
"urls": []
},
"url": {
"urls": [
{
"display_url": "developer.x.com",
"expanded_url": "https://developer.x.com/",
"url": "https://t.co/O13IfbuPqq",
"indices": [
0,
23
]
}
]
}
},
"fast_followers_count": 0,
"favourites_count": 2148,
"followers_count": 646225,
"friends_count": 1777,
"has_custom_timelines": true,
"is_translator": false,
"listed_count": 2684,
"location": "127.0.0.1",
"media_count": 820,
"name": "Developers",
"normal_followers_count": 646225,
"pinned_tweet_ids_str": [
"1770153912013615285"
],
"possibly_sensitive": false,
"profile_banner_url": "https://pbs.twimg.com/profile_banners/2244994945/1690213128",
"profile_image_url_https": "https://pbs.twimg.com/profile_images/1683501992314798080/xl1POYLw_normal.jpg",
"profile_interstitial_type": "",
"screen_name": "XDevelopers",
"statuses_count": 4089,
"translator_type": "regular",
"url": "https://t.co/O13IfbuPqq",
"verified": false,
"verified_type": "Business",
"want_retweets": false,
"withheld_in_countries": []
},
"professional": {
"rest_id": "1516891231749517312",
"professional_type": "Business",
"category": [
{
"id": 1009,
"name": "Community",
"icon_name": "IconBriefcaseStroke"
}
]
},
"tipjar_settings": {}
}
}
},
"card": {
"rest_id": "https://t.co/7ajX0RPmaj",
"legacy": {
"binding_values": [
{
"key": "narrow_cast_space_type",
"value": {
"string_value": "0",
"type": "STRING"
}
},
{
"key": "id",
"value": {
"string_value": "1vOxwjaWEbdJB",
"type": "STRING"
}
},
{
"key": "card_url",
"value": {
"scribe_key": "card_url",
"string_value": "https://t.co/7ajX0RPmaj",
"type": "STRING"
}
}
],
"card_platform": {
"platform": {
"audience": {
"name": "production"
},
"device": {
"name": "Swift",
"version": "12"
}
}
},
"name": "3691233323:audiospace",
"url": "https://t.co/7ajX0RPmaj",
"user_refs_results": []
}
},
"unmention_data": {},
"edit_control": {
"edit_tweet_ids": [
"1789054061729173804"
],
"editable_until_msecs": "1715382301990",
"is_edit_eligible": false,
"edits_remaining": "5"
},
"is_translatable": false,
"views": {
"state": "Enabled"
},
"source": "<a href=\"http://twitter.com/download/iphone\" rel=\"nofollow\">Twitter for iPhone</a>",
"legacy": {
"bookmark_count": 0,
"bookmarked": false,
"created_at": "Fri May 10 22:05:01 +0000 2024",
"conversation_id_str": "1789054061729173804",
"display_text_range": [
0,
140
],
"entities": {
"hashtags": [],
"symbols": [],
"timestamps": [],
"urls": [],
"user_mentions": [
{
"id_str": "857699969263964161",
"name": "Suhem Parack",
"screen_name": "suhemparack",
"indices": [
3,
15
]
},
{
"id_str": "2244994945",
"name": "Developers",
"screen_name": "XDevelopers",
"indices": [
109,
121
]
}
]
},
"favorite_count": 0,
"favorited": false,
"full_text": "RT @suhemparack: If you have an app that posts emergency services updates to X using the API, please join us @XDevelopers next week to lear…",
"is_quote_status": false,
"lang": "en",
"quote_count": 0,
"reply_count": 0,
"retweet_count": 26,
"retweeted": false,
"user_id_str": "2244994945",
"id_str": "1789054061729173804",
"retweeted_status_result": {
"result": {
"__typename": "Tweet",
"rest_id": "1789053970587271232",
"has_birdwatch_notes": false,
"core": {
"user_results": {
"result": {
"__typename": "User",
"id": "VXNlcjo4NTc2OTk5NjkyNjM5NjQxNjE=",
"rest_id": "857699969263964161",
"affiliates_highlighted_label": {
"label": {
"url": {
"url": "https://twitter.com/X",
"urlType": "DeepLink"
},
"badge": {
"url": "https://pbs.twimg.com/profile_images/1683899100922511378/5lY42eHs_bigger.jpg"
},
"description": "X",
"userLabelType": "BusinessLabel",
"userLabelDisplayType": "Badge"
}
},
"has_graduated_access": true,
"is_blue_verified": true,
"profile_image_shape": "Circle",
"legacy": {
"can_dm": true,
"can_media_tag": false,
"created_at": "Thu Apr 27 20:56:22 +0000 2017",
"default_profile": true,
"default_profile_image": false,
"description": "Partner Engineering @ 𝕏 Opinions my own. RTs != endorsements etc",
"entities": {
"description": {
"urls": []
},
"url": {
"urls": [
{
"display_url": "developer.x.com",
"expanded_url": "https://developer.x.com",
"url": "https://t.co/Rh0kWC6xS8",
"indices": [
0,
23
]
}
]
}
},
"fast_followers_count": 0,
"favourites_count": 141,
"followers_count": 4788,
"friends_count": 1577,
"has_custom_timelines": true,
"is_translator": false,
"listed_count": 67,
"location": "San Francisco, CA",
"media_count": 134,
"name": "Suhem Parack",
"normal_followers_count": 4788,
"pinned_tweet_ids_str": [
"1789053970587271232"
],
"possibly_sensitive": false,
"profile_banner_url": "https://pbs.twimg.com/profile_banners/857699969263964161/1712432865",
"profile_image_url_https": "https://pbs.twimg.com/profile_images/1795573096063397892/HB6ShS6B_normal.jpg",
"profile_interstitial_type": "",
"screen_name": "suhemparack",
"statuses_count": 2214,
"translator_type": "none",
"url": "https://t.co/Rh0kWC6xS8",
"verified": false,
"want_retweets": false,
"withheld_in_countries": []
},
"professional": {
"rest_id": "1471588288142057479",
"professional_type": "Creator",
"category": []
},
"tipjar_settings": {}
}
}
},
"card": {
"rest_id": "https://t.co/7ajX0RPmaj",
"legacy": {
"binding_values": [
{
"key": "narrow_cast_space_type",
"value": {
"string_value": "0",
"type": "STRING"
}
},
{
"key": "id",
"value": {
"string_value": "1vOxwjaWEbdJB",
"type": "STRING"
}
},
{
"key": "card_url",
"value": {
"scribe_key": "card_url",
"string_value": "https://t.co/7ajX0RPmaj",
"type": "STRING"
}
}
],
"card_platform": {
"platform": {
"audience": {
"name": "production"
},
"device": {
"name": "Swift",
"version": "12"
}
}
},
"name": "3691233323:audiospace",
"url": "https://t.co/7ajX0RPmaj",
"user_refs_results": []
}
},
"unmention_data": {},
"edit_control": {
"edit_tweet_ids": [
"1789053970587271232"
],
"editable_until_msecs": "1715382280000",
"is_edit_eligible": false,
"edits_remaining": "5"
},
"is_translatable": false,
"views": {
"count": "121718",
"state": "EnabledWithCount"
},
"source": "<a href=\"http://twitter.com/download/iphone\" rel=\"nofollow\">Twitter for iPhone</a>",
"legacy": {
"bookmark_count": 9,
"bookmarked": false,
"created_at": "Fri May 10 22:04:40 +0000 2024",
"conversation_id_str": "1789053970587271232",
"display_text_range": [
0,
200
],
"entities": {
"hashtags": [],
"symbols": [],
"timestamps": [],
"urls": [
{
"display_url": "x.com/i/spaces/1voxw…",
"expanded_url": "https://twitter.com/i/spaces/1vOxwjaWEbdJB",
"url": "https://t.co/7ajX0RPmaj",
"indices": [
177,
200
]
}
],
"user_mentions": [
{
"id_str": "2244994945",
"name": "Developers",
"screen_name": "XDevelopers",
"indices": [
92,
104
]
}
]
},
"favorite_count": 108,
"favorited": false,
"full_text": "If you have an app that posts emergency services updates to X using the API, please join us @XDevelopers next week to learn how you can quickly migrate your app to the X API v2 https://t.co/7ajX0RPmaj",
"is_quote_status": false,
"lang": "en",
"possibly_sensitive": false,
"possibly_sensitive_editable": true,
"quote_count": 5,
"reply_count": 16,
"retweet_count": 26,
"retweeted": false,
"user_id_str": "857699969263964161",
"id_str": "1789053970587271232"
}
}
}
},
"quick_promote_eligibility": {
"eligibility": "IneligibleNotProfessional"
}
}
},
"tweetDisplayType": "Tweet",
"hasModeratedReplies": false
}
}
}
]
},
{
"type": "TimelineTerminateTimeline",
"direction": "Top"
}
]
}
}
}

1064
tests/mocked-data/card_broadcast.json Обычный файл

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

Просмотреть файл

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -4,11 +4,8 @@ import pytest
from twscrape.accounts_pool import NoAccountError from twscrape.accounts_pool import NoAccountError
from twscrape.api import API from twscrape.api import API
from twscrape.logger import set_log_level
from twscrape.utils import gather, get_env_bool from twscrape.utils import gather, get_env_bool
set_log_level("DEBUG")
class MockedError(Exception): class MockedError(Exception):
pass pass

Просмотреть файл

@ -3,15 +3,21 @@ import os
from typing import Callable from typing import Callable
from twscrape import API, gather from twscrape import API, gather
from twscrape.logger import set_log_level from twscrape.models import (
from twscrape.models import PollCard, SummaryCard, Tweet, User, UserRef, parse_tweet AudiospaceCard,
BroadcastCard,
PollCard,
SummaryCard,
Tweet,
User,
UserRef,
parse_tweet,
)
BASE_DIR = os.path.dirname(__file__) BASE_DIR = os.path.dirname(__file__)
DATA_DIR = os.path.join(BASE_DIR, "mocked-data") DATA_DIR = os.path.join(BASE_DIR, "mocked-data")
os.makedirs(DATA_DIR, exist_ok=True) os.makedirs(DATA_DIR, exist_ok=True)
set_log_level("DEBUG")
class FakeRep: class FakeRep:
text: str text: str
@ -419,9 +425,13 @@ async def test_issue_56():
assert len(doc.links) == 5 assert len(doc.links) == 5
async def test_issue_72(): async def test_cards():
# Issues:
# - https://github.com/vladkens/twscrape/issues/72
# - https://github.com/vladkens/twscrape/issues/191
# Check SummaryCard # Check SummaryCard
raw = fake_rep("_issue_72").json() raw = fake_rep("card_summary").json()
doc = parse_tweet(raw, 1696922210588410217) doc = parse_tweet(raw, 1696922210588410217)
assert doc is not None assert doc is not None
assert doc.card is not None assert doc.card is not None
@ -431,8 +441,8 @@ async def test_issue_72():
assert doc.card.description is not None assert doc.card.description is not None
assert doc.card.url is not None assert doc.card.url is not None
# Check PoolCard # Check PollCard
raw = fake_rep("_issue_72_poll").json() raw = fake_rep("card_poll").json()
doc = parse_tweet(raw, 1780666831310877100) doc = parse_tweet(raw, 1780666831310877100)
assert doc is not None assert doc is not None
assert doc.card is not None assert doc.card is not None
@ -444,3 +454,21 @@ async def test_issue_72():
for x in doc.card.options: for x in doc.card.options:
assert x.label is not None assert x.label is not None
assert x.votesCount is not None assert x.votesCount is not None
# Check BrodcastCard
raw = fake_rep("card_broadcast").json()
doc = parse_tweet(raw, 1790441814857826439)
assert doc is not None and doc.card is not None
assert doc.card._type == "broadcast"
assert isinstance(doc.card, BroadcastCard)
assert doc.card.title is not None
assert doc.card.url is not None
assert doc.card.photo is not None
# Check AudiospaceCard
raw = fake_rep("card_audiospace").json()
doc = parse_tweet(raw, 1789054061729173804)
assert doc is not None and doc.card is not None
assert doc.card._type == "audiospace"
assert isinstance(doc.card, AudiospaceCard)
assert doc.card.url is not None

Просмотреть файл

@ -4,11 +4,8 @@ import httpx
from pytest_httpx import HTTPXMock from pytest_httpx import HTTPXMock
from twscrape.accounts_pool import AccountsPool from twscrape.accounts_pool import AccountsPool
from twscrape.logger import set_log_level
from twscrape.queue_client import QueueClient from twscrape.queue_client import QueueClient
set_log_level("ERROR")
DB_FILE = "/tmp/twscrape_test_queue_client.db" DB_FILE = "/tmp/twscrape_test_queue_client.db"
URL = "https://example.com/api" URL = "https://example.com/api"
CF = tuple[AccountsPool, QueueClient] CF = tuple[AccountsPool, QueueClient]

Просмотреть файл

@ -12,5 +12,9 @@ def set_log_level(level: _LEVELS):
_LOG_LEVEL = level _LOG_LEVEL = level
def _filter(r):
return r["level"].no >= logger.level(_LOG_LEVEL).no
logger.remove() logger.remove()
logger.add(sys.stderr, filter=lambda r: r["level"].no >= logger.level(_LOG_LEVEL).no) logger.add(sys.stderr, filter=_filter)

Просмотреть файл

@ -4,10 +4,11 @@ import os
import random import random
import re import re
import string import string
import sys
import traceback import traceback
from dataclasses import asdict, dataclass, field from dataclasses import asdict, dataclass, field
from datetime import datetime from datetime import datetime
from typing import Generator, Optional from typing import Generator, Optional, Union
import httpx import httpx
@ -187,7 +188,7 @@ class Tweet(JSONTrait):
sourceUrl: str | None = None sourceUrl: str | None = None
sourceLabel: str | None = None sourceLabel: str | None = None
media: Optional["Media"] = None media: Optional["Media"] = None
card: Optional["SummaryCard"] | Optional["PollCard"] = None card: Union[None, "SummaryCard", "PollCard", "BroadcastCard", "AudiospaceCard"] = None
_type: str = "snscrape.modules.twitter.Tweet" _type: str = "snscrape.modules.twitter.Tweet"
# todo: # todo:
@ -381,6 +382,20 @@ class PollCard(Card):
_type: str = "poll" _type: str = "poll"
@dataclass
class BroadcastCard(Card):
title: str
url: str
photo: MediaPhoto | None = None
_type: str = "broadcast"
@dataclass
class AudiospaceCard(Card):
url: str
_type: str = "audiospace"
def _parse_card_get_bool(values: list[dict], key: str): def _parse_card_get_bool(values: list[dict], key: str):
for x in values: for x in values:
if x["key"] == key: if x["key"] == key:
@ -388,7 +403,7 @@ def _parse_card_get_bool(values: list[dict], key: str):
return False return False
def _parse_card_get_str(values: list[dict], key: str, defaultVal=None): def _parse_card_get_str(values: list[dict], key: str, defaultVal=None) -> str | None:
for x in values: for x in values:
if x["key"] == key: if x["key"] == key:
return x["value"]["string_value"] return x["value"]["string_value"]
@ -501,8 +516,31 @@ def _parse_card(obj: dict, url: str):
# print(json.dumps(val, indent=2)) # print(json.dumps(val, indent=2))
return PollCard(options=options, finished=finished) return PollCard(options=options, finished=finished)
if name == "745291183405076480:broadcast":
val = _parse_card_prepare_values(obj)
card_url = _parse_card_get_str(val, "broadcast_url")
card_title = _parse_card_get_str(val, "broadcast_title")
photo, _ = _parse_card_extract_largest_photo(val)
if card_url is None or card_title is None:
return None
return BroadcastCard(title=card_title, url=card_url, photo=photo)
if name == "3691233323:audiospace":
# no more data in this object, possible extra api call needed to get card info
val = _parse_card_prepare_values(obj)
card_url = _parse_card_get_str(val, "card_url")
if card_url is None:
return None
# print(json.dumps(val, indent=2))
return AudiospaceCard(url=card_url)
logger.warning(f"Unknown card type '{name}' on {url}") logger.warning(f"Unknown card type '{name}' on {url}")
# print(json.dumps(obj["card"]["legacy"], indent=2)) if "PYTEST_CURRENT_TEST" in os.environ: # help debugging tests
print(f"Unknown card type '{name}' on {url}", file=sys.stderr)
# print(json.dumps(obj["card"]["legacy"], indent=2))
return None
# internal helpers # internal helpers