Этот коммит содержится в:
Vlad Pronsky 2023-07-05 00:17:40 +03:00
родитель 65f40c9a5a
Коммит 5e4f5392d8
2 изменённых файлов: 22 добавлений и 12 удалений

Просмотреть файл

@ -4,7 +4,7 @@ build-backend = "hatchling.build"
[project] [project]
name = "twscrape" name = "twscrape"
version = "0.3.0" version = "0.4.0"
authors = [{name = "vladkens", email = "v.pronsky@gmail.com"}] authors = [{name = "vladkens", email = "v.pronsky@gmail.com"}]
description = "Twitter GraphQL and Search API implementation with SNScrape data models" description = "Twitter GraphQL and Search API implementation with SNScrape data models"
readme = "readme.md" readme = "readme.md"

Просмотреть файл

@ -60,17 +60,20 @@ class TextLink(JSONTrait):
url: str url: str
text: str | None text: str | None
tcourl: str | None tcourl: str | None
indices: tuple[int, int]
@staticmethod @staticmethod
def parse(obj: dict): def parse(obj: dict):
return TextLink( tmp = TextLink(
url=obj["expanded_url"], url=obj.get("expanded_url", None),
text=obj["display_url"], text=obj.get("display_url", None),
tcourl=obj["url"], tcourl=obj.get("url", None),
indices=tuple(obj["indices"]),
) )
if tmp.url is None or tmp.tcourl is None:
return None
return tmp
@dataclass @dataclass
class UserRef(JSONTrait): class UserRef(JSONTrait):
@ -113,9 +116,6 @@ class User(JSONTrait):
@staticmethod @staticmethod
def parse(obj: dict): def parse(obj: dict):
links = get_or(obj, "entities.description.urls", []) + get_or(obj, "entities.url.urls", [])
links = [TextLink.parse(x) for x in links]
return User( return User(
id=int(obj["id_str"]), id=int(obj["id_str"]),
id_str=obj["id_str"], id_str=obj["id_str"],
@ -135,7 +135,7 @@ class User(JSONTrait):
profileBannerUrl=obj.get("profile_banner_url"), profileBannerUrl=obj.get("profile_banner_url"),
verified=obj.get("verified"), verified=obj.get("verified"),
protected=obj.get("protected"), protected=obj.get("protected"),
descriptionLinks=links, descriptionLinks=_parse_links(obj, ["entities.description.urls", "entities.url.urls"]),
) )
@ -197,7 +197,7 @@ class Tweet(JSONTrait):
hashtags=[x["text"] for x in get_or(obj, "entities.hashtags", [])], hashtags=[x["text"] for x in get_or(obj, "entities.hashtags", [])],
cashtags=[x["text"] for x in get_or(obj, "entities.symbols", [])], cashtags=[x["text"] for x in get_or(obj, "entities.symbols", [])],
mentionedUsers=[UserRef.parse(x) for x in get_or(obj, "entities.user_mentions", [])], mentionedUsers=[UserRef.parse(x) for x in get_or(obj, "entities.user_mentions", [])],
links=[TextLink.parse(x) for x in get_or(obj, "entities.urls", [])], links=_parse_links(obj, ["entities.urls"]),
viewCount=int_or_none(obj, "ext_views.count"), viewCount=int_or_none(obj, "ext_views.count"),
retweetedTweet=Tweet.parse(rt_obj, res) if rt_obj else None, retweetedTweet=Tweet.parse(rt_obj, res) if rt_obj else None,
quotedTweet=Tweet.parse(qt_obj, res) if qt_obj else None, quotedTweet=Tweet.parse(qt_obj, res) if qt_obj else None,
@ -335,3 +335,13 @@ def _get_source_label(tw_obj: dict):
if source and (match := re.search(r">([^<]*)<", source)): if source and (match := re.search(r">([^<]*)<", source)):
return str(match.group(1)) return str(match.group(1))
return None return None
def _parse_links(obj: dict, paths: list[str]):
links = []
for x in paths:
links.extend(get_or(obj, x, []))
links = [TextLink.parse(x) for x in links]
links = [x for x in links if x is not None]
return links