added broadcast / audiospace cards #191

Этот коммит содержится в:
Vlad Pronsky 2024-06-29 20:28:06 +03:00
родитель 3c4bbe1d8e
Коммит fe18dd1e17
13 изменённых файлов: 1587 добавлений и 14752 удалений

Просмотреть файл

@ -71,3 +71,7 @@ update-mocks:
twscrape list_timeline --raw --limit 10 1494877848087187461 | jq > ./tests/mocked-data/raw_list_timeline.json
@# twscrape favoriters --raw --limit 10 1649191520250245121 | jq > ./tests/mocked-data/raw_favoriters.json
@# twscrape liked_tweets --raw --limit 10 2244994945 | jq > ./tests/mocked-data/raw_likes.json
x:
twscrape tweet_details --raw 1790441814857826439 | jq > ./tests/mocked-data/card_broadcast.json
twscrape tweet_details --raw 1789054061729173804 | jq > ./tests/mocked-data/card_audiospace.json

Просмотреть файл

@ -2,8 +2,11 @@ import pytest
from twscrape.accounts_pool import AccountsPool
from twscrape.api import API
from twscrape.logger import set_log_level
from twscrape.queue_client import QueueClient
set_log_level("ERROR")
@pytest.fixture
def pool_mock(tmp_path):

433
tests/mocked-data/card_audiospace.json Обычный файл
Просмотреть файл

@ -0,0 +1,433 @@
{
"data": {
"threaded_conversation_with_injections_v2": {
"instructions": [
{
"type": "TimelineAddEntries",
"entries": [
{
"entryId": "tweet-1789054061729173804",
"sortIndex": "7434317975125602003",
"content": {
"entryType": "TimelineTimelineItem",
"__typename": "TimelineTimelineItem",
"itemContent": {
"itemType": "TimelineTweet",
"__typename": "TimelineTweet",
"tweet_results": {
"result": {
"__typename": "Tweet",
"rest_id": "1789054061729173804",
"has_birdwatch_notes": false,
"core": {
"user_results": {
"result": {
"__typename": "User",
"id": "VXNlcjoyMjQ0OTk0OTQ1",
"rest_id": "2244994945",
"affiliates_highlighted_label": {
"label": {
"url": {
"url": "https://twitter.com/X",
"urlType": "DeepLink"
},
"badge": {
"url": "https://pbs.twimg.com/profile_images/1683899100922511378/5lY42eHs_bigger.jpg"
},
"description": "X",
"userLabelType": "BusinessLabel",
"userLabelDisplayType": "Badge"
}
},
"has_graduated_access": true,
"is_blue_verified": true,
"profile_image_shape": "Square",
"legacy": {
"can_dm": true,
"can_media_tag": true,
"created_at": "Sat Dec 14 04:35:55 +0000 2013",
"default_profile": false,
"default_profile_image": false,
"description": "The voice of the X Dev team and your official source for updates, news, and events, related to the X API.",
"entities": {
"description": {
"urls": []
},
"url": {
"urls": [
{
"display_url": "developer.x.com",
"expanded_url": "https://developer.x.com/",
"url": "https://t.co/O13IfbuPqq",
"indices": [
0,
23
]
}
]
}
},
"fast_followers_count": 0,
"favourites_count": 2148,
"followers_count": 646225,
"friends_count": 1777,
"has_custom_timelines": true,
"is_translator": false,
"listed_count": 2684,
"location": "127.0.0.1",
"media_count": 820,
"name": "Developers",
"normal_followers_count": 646225,
"pinned_tweet_ids_str": [
"1770153912013615285"
],
"possibly_sensitive": false,
"profile_banner_url": "https://pbs.twimg.com/profile_banners/2244994945/1690213128",
"profile_image_url_https": "https://pbs.twimg.com/profile_images/1683501992314798080/xl1POYLw_normal.jpg",
"profile_interstitial_type": "",
"screen_name": "XDevelopers",
"statuses_count": 4089,
"translator_type": "regular",
"url": "https://t.co/O13IfbuPqq",
"verified": false,
"verified_type": "Business",
"want_retweets": false,
"withheld_in_countries": []
},
"professional": {
"rest_id": "1516891231749517312",
"professional_type": "Business",
"category": [
{
"id": 1009,
"name": "Community",
"icon_name": "IconBriefcaseStroke"
}
]
},
"tipjar_settings": {}
}
}
},
"card": {
"rest_id": "https://t.co/7ajX0RPmaj",
"legacy": {
"binding_values": [
{
"key": "narrow_cast_space_type",
"value": {
"string_value": "0",
"type": "STRING"
}
},
{
"key": "id",
"value": {
"string_value": "1vOxwjaWEbdJB",
"type": "STRING"
}
},
{
"key": "card_url",
"value": {
"scribe_key": "card_url",
"string_value": "https://t.co/7ajX0RPmaj",
"type": "STRING"
}
}
],
"card_platform": {
"platform": {
"audience": {
"name": "production"
},
"device": {
"name": "Swift",
"version": "12"
}
}
},
"name": "3691233323:audiospace",
"url": "https://t.co/7ajX0RPmaj",
"user_refs_results": []
}
},
"unmention_data": {},
"edit_control": {
"edit_tweet_ids": [
"1789054061729173804"
],
"editable_until_msecs": "1715382301990",
"is_edit_eligible": false,
"edits_remaining": "5"
},
"is_translatable": false,
"views": {
"state": "Enabled"
},
"source": "<a href=\"http://twitter.com/download/iphone\" rel=\"nofollow\">Twitter for iPhone</a>",
"legacy": {
"bookmark_count": 0,
"bookmarked": false,
"created_at": "Fri May 10 22:05:01 +0000 2024",
"conversation_id_str": "1789054061729173804",
"display_text_range": [
0,
140
],
"entities": {
"hashtags": [],
"symbols": [],
"timestamps": [],
"urls": [],
"user_mentions": [
{
"id_str": "857699969263964161",
"name": "Suhem Parack",
"screen_name": "suhemparack",
"indices": [
3,
15
]
},
{
"id_str": "2244994945",
"name": "Developers",
"screen_name": "XDevelopers",
"indices": [
109,
121
]
}
]
},
"favorite_count": 0,
"favorited": false,
"full_text": "RT @suhemparack: If you have an app that posts emergency services updates to X using the API, please join us @XDevelopers next week to lear…",
"is_quote_status": false,
"lang": "en",
"quote_count": 0,
"reply_count": 0,
"retweet_count": 26,
"retweeted": false,
"user_id_str": "2244994945",
"id_str": "1789054061729173804",
"retweeted_status_result": {
"result": {
"__typename": "Tweet",
"rest_id": "1789053970587271232",
"has_birdwatch_notes": false,
"core": {
"user_results": {
"result": {
"__typename": "User",
"id": "VXNlcjo4NTc2OTk5NjkyNjM5NjQxNjE=",
"rest_id": "857699969263964161",
"affiliates_highlighted_label": {
"label": {
"url": {
"url": "https://twitter.com/X",
"urlType": "DeepLink"
},
"badge": {
"url": "https://pbs.twimg.com/profile_images/1683899100922511378/5lY42eHs_bigger.jpg"
},
"description": "X",
"userLabelType": "BusinessLabel",
"userLabelDisplayType": "Badge"
}
},
"has_graduated_access": true,
"is_blue_verified": true,
"profile_image_shape": "Circle",
"legacy": {
"can_dm": true,
"can_media_tag": false,
"created_at": "Thu Apr 27 20:56:22 +0000 2017",
"default_profile": true,
"default_profile_image": false,
"description": "Partner Engineering @ 𝕏 Opinions my own. RTs != endorsements etc",
"entities": {
"description": {
"urls": []
},
"url": {
"urls": [
{
"display_url": "developer.x.com",
"expanded_url": "https://developer.x.com",
"url": "https://t.co/Rh0kWC6xS8",
"indices": [
0,
23
]
}
]
}
},
"fast_followers_count": 0,
"favourites_count": 141,
"followers_count": 4788,
"friends_count": 1577,
"has_custom_timelines": true,
"is_translator": false,
"listed_count": 67,
"location": "San Francisco, CA",
"media_count": 134,
"name": "Suhem Parack",
"normal_followers_count": 4788,
"pinned_tweet_ids_str": [
"1789053970587271232"
],
"possibly_sensitive": false,
"profile_banner_url": "https://pbs.twimg.com/profile_banners/857699969263964161/1712432865",
"profile_image_url_https": "https://pbs.twimg.com/profile_images/1795573096063397892/HB6ShS6B_normal.jpg",
"profile_interstitial_type": "",
"screen_name": "suhemparack",
"statuses_count": 2214,
"translator_type": "none",
"url": "https://t.co/Rh0kWC6xS8",
"verified": false,
"want_retweets": false,
"withheld_in_countries": []
},
"professional": {
"rest_id": "1471588288142057479",
"professional_type": "Creator",
"category": []
},
"tipjar_settings": {}
}
}
},
"card": {
"rest_id": "https://t.co/7ajX0RPmaj",
"legacy": {
"binding_values": [
{
"key": "narrow_cast_space_type",
"value": {
"string_value": "0",
"type": "STRING"
}
},
{
"key": "id",
"value": {
"string_value": "1vOxwjaWEbdJB",
"type": "STRING"
}
},
{
"key": "card_url",
"value": {
"scribe_key": "card_url",
"string_value": "https://t.co/7ajX0RPmaj",
"type": "STRING"
}
}
],
"card_platform": {
"platform": {
"audience": {
"name": "production"
},
"device": {
"name": "Swift",
"version": "12"
}
}
},
"name": "3691233323:audiospace",
"url": "https://t.co/7ajX0RPmaj",
"user_refs_results": []
}
},
"unmention_data": {},
"edit_control": {
"edit_tweet_ids": [
"1789053970587271232"
],
"editable_until_msecs": "1715382280000",
"is_edit_eligible": false,
"edits_remaining": "5"
},
"is_translatable": false,
"views": {
"count": "121718",
"state": "EnabledWithCount"
},
"source": "<a href=\"http://twitter.com/download/iphone\" rel=\"nofollow\">Twitter for iPhone</a>",
"legacy": {
"bookmark_count": 9,
"bookmarked": false,
"created_at": "Fri May 10 22:04:40 +0000 2024",
"conversation_id_str": "1789053970587271232",
"display_text_range": [
0,
200
],
"entities": {
"hashtags": [],
"symbols": [],
"timestamps": [],
"urls": [
{
"display_url": "x.com/i/spaces/1voxw…",
"expanded_url": "https://twitter.com/i/spaces/1vOxwjaWEbdJB",
"url": "https://t.co/7ajX0RPmaj",
"indices": [
177,
200
]
}
],
"user_mentions": [
{
"id_str": "2244994945",
"name": "Developers",
"screen_name": "XDevelopers",
"indices": [
92,
104
]
}
]
},
"favorite_count": 108,
"favorited": false,
"full_text": "If you have an app that posts emergency services updates to X using the API, please join us @XDevelopers next week to learn how you can quickly migrate your app to the X API v2 https://t.co/7ajX0RPmaj",
"is_quote_status": false,
"lang": "en",
"possibly_sensitive": false,
"possibly_sensitive_editable": true,
"quote_count": 5,
"reply_count": 16,
"retweet_count": 26,
"retweeted": false,
"user_id_str": "857699969263964161",
"id_str": "1789053970587271232"
}
}
}
},
"quick_promote_eligibility": {
"eligibility": "IneligibleNotProfessional"
}
}
},
"tweetDisplayType": "Tweet",
"hasModeratedReplies": false
}
}
}
]
},
{
"type": "TimelineTerminateTimeline",
"direction": "Top"
}
]
}
}
}

1064
tests/mocked-data/card_broadcast.json Обычный файл

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

Просмотреть файл

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -4,11 +4,8 @@ import pytest
from twscrape.accounts_pool import NoAccountError
from twscrape.api import API
from twscrape.logger import set_log_level
from twscrape.utils import gather, get_env_bool
set_log_level("DEBUG")
class MockedError(Exception):
pass

Просмотреть файл

@ -3,15 +3,21 @@ import os
from typing import Callable
from twscrape import API, gather
from twscrape.logger import set_log_level
from twscrape.models import PollCard, SummaryCard, Tweet, User, UserRef, parse_tweet
from twscrape.models import (
AudiospaceCard,
BroadcastCard,
PollCard,
SummaryCard,
Tweet,
User,
UserRef,
parse_tweet,
)
BASE_DIR = os.path.dirname(__file__)
DATA_DIR = os.path.join(BASE_DIR, "mocked-data")
os.makedirs(DATA_DIR, exist_ok=True)
set_log_level("DEBUG")
class FakeRep:
text: str
@ -419,9 +425,13 @@ async def test_issue_56():
assert len(doc.links) == 5
async def test_issue_72():
async def test_cards():
# Issues:
# - https://github.com/vladkens/twscrape/issues/72
# - https://github.com/vladkens/twscrape/issues/191
# Check SummaryCard
raw = fake_rep("_issue_72").json()
raw = fake_rep("card_summary").json()
doc = parse_tweet(raw, 1696922210588410217)
assert doc is not None
assert doc.card is not None
@ -431,8 +441,8 @@ async def test_issue_72():
assert doc.card.description is not None
assert doc.card.url is not None
# Check PoolCard
raw = fake_rep("_issue_72_poll").json()
# Check PollCard
raw = fake_rep("card_poll").json()
doc = parse_tweet(raw, 1780666831310877100)
assert doc is not None
assert doc.card is not None
@ -444,3 +454,21 @@ async def test_issue_72():
for x in doc.card.options:
assert x.label is not None
assert x.votesCount is not None
# Check BrodcastCard
raw = fake_rep("card_broadcast").json()
doc = parse_tweet(raw, 1790441814857826439)
assert doc is not None and doc.card is not None
assert doc.card._type == "broadcast"
assert isinstance(doc.card, BroadcastCard)
assert doc.card.title is not None
assert doc.card.url is not None
assert doc.card.photo is not None
# Check AudiospaceCard
raw = fake_rep("card_audiospace").json()
doc = parse_tweet(raw, 1789054061729173804)
assert doc is not None and doc.card is not None
assert doc.card._type == "audiospace"
assert isinstance(doc.card, AudiospaceCard)
assert doc.card.url is not None

Просмотреть файл

@ -4,11 +4,8 @@ import httpx
from pytest_httpx import HTTPXMock
from twscrape.accounts_pool import AccountsPool
from twscrape.logger import set_log_level
from twscrape.queue_client import QueueClient
set_log_level("ERROR")
DB_FILE = "/tmp/twscrape_test_queue_client.db"
URL = "https://example.com/api"
CF = tuple[AccountsPool, QueueClient]

Просмотреть файл

@ -12,5 +12,9 @@ def set_log_level(level: _LEVELS):
_LOG_LEVEL = level
def _filter(r):
return r["level"].no >= logger.level(_LOG_LEVEL).no
logger.remove()
logger.add(sys.stderr, filter=lambda r: r["level"].no >= logger.level(_LOG_LEVEL).no)
logger.add(sys.stderr, filter=_filter)

Просмотреть файл

@ -4,10 +4,11 @@ import os
import random
import re
import string
import sys
import traceback
from dataclasses import asdict, dataclass, field
from datetime import datetime
from typing import Generator, Optional
from typing import Generator, Optional, Union
import httpx
@ -187,7 +188,7 @@ class Tweet(JSONTrait):
sourceUrl: str | None = None
sourceLabel: str | None = None
media: Optional["Media"] = None
card: Optional["SummaryCard"] | Optional["PollCard"] = None
card: Union[None, "SummaryCard", "PollCard", "BroadcastCard", "AudiospaceCard"] = None
_type: str = "snscrape.modules.twitter.Tweet"
# todo:
@ -381,6 +382,20 @@ class PollCard(Card):
_type: str = "poll"
@dataclass
class BroadcastCard(Card):
title: str
url: str
photo: MediaPhoto | None = None
_type: str = "broadcast"
@dataclass
class AudiospaceCard(Card):
url: str
_type: str = "audiospace"
def _parse_card_get_bool(values: list[dict], key: str):
for x in values:
if x["key"] == key:
@ -388,7 +403,7 @@ def _parse_card_get_bool(values: list[dict], key: str):
return False
def _parse_card_get_str(values: list[dict], key: str, defaultVal=None):
def _parse_card_get_str(values: list[dict], key: str, defaultVal=None) -> str | None:
for x in values:
if x["key"] == key:
return x["value"]["string_value"]
@ -501,8 +516,31 @@ def _parse_card(obj: dict, url: str):
# print(json.dumps(val, indent=2))
return PollCard(options=options, finished=finished)
if name == "745291183405076480:broadcast":
val = _parse_card_prepare_values(obj)
card_url = _parse_card_get_str(val, "broadcast_url")
card_title = _parse_card_get_str(val, "broadcast_title")
photo, _ = _parse_card_extract_largest_photo(val)
if card_url is None or card_title is None:
return None
return BroadcastCard(title=card_title, url=card_url, photo=photo)
if name == "3691233323:audiospace":
# no more data in this object, possible extra api call needed to get card info
val = _parse_card_prepare_values(obj)
card_url = _parse_card_get_str(val, "card_url")
if card_url is None:
return None
# print(json.dumps(val, indent=2))
return AudiospaceCard(url=card_url)
logger.warning(f"Unknown card type '{name}' on {url}")
# print(json.dumps(obj["card"]["legacy"], indent=2))
if "PYTEST_CURRENT_TEST" in os.environ: # help debugging tests
print(f"Unknown card type '{name}' on {url}", file=sys.stderr)
# print(json.dumps(obj["card"]["legacy"], indent=2))
return None
# internal helpers