зеркало из
https://github.com/viginum-datalab/twscrape.git
synced 2025-11-03 23:46:20 +02:00
fix: urls in user profile
Этот коммит содержится в:
родитель
65f40c9a5a
Коммит
5e4f5392d8
@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "twscrape"
|
name = "twscrape"
|
||||||
version = "0.3.0"
|
version = "0.4.0"
|
||||||
authors = [{name = "vladkens", email = "v.pronsky@gmail.com"}]
|
authors = [{name = "vladkens", email = "v.pronsky@gmail.com"}]
|
||||||
description = "Twitter GraphQL and Search API implementation with SNScrape data models"
|
description = "Twitter GraphQL and Search API implementation with SNScrape data models"
|
||||||
readme = "readme.md"
|
readme = "readme.md"
|
||||||
|
|||||||
@ -60,17 +60,20 @@ class TextLink(JSONTrait):
|
|||||||
url: str
|
url: str
|
||||||
text: str | None
|
text: str | None
|
||||||
tcourl: str | None
|
tcourl: str | None
|
||||||
indices: tuple[int, int]
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def parse(obj: dict):
|
def parse(obj: dict):
|
||||||
return TextLink(
|
tmp = TextLink(
|
||||||
url=obj["expanded_url"],
|
url=obj.get("expanded_url", None),
|
||||||
text=obj["display_url"],
|
text=obj.get("display_url", None),
|
||||||
tcourl=obj["url"],
|
tcourl=obj.get("url", None),
|
||||||
indices=tuple(obj["indices"]),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if tmp.url is None or tmp.tcourl is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return tmp
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class UserRef(JSONTrait):
|
class UserRef(JSONTrait):
|
||||||
@ -113,9 +116,6 @@ class User(JSONTrait):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def parse(obj: dict):
|
def parse(obj: dict):
|
||||||
links = get_or(obj, "entities.description.urls", []) + get_or(obj, "entities.url.urls", [])
|
|
||||||
links = [TextLink.parse(x) for x in links]
|
|
||||||
|
|
||||||
return User(
|
return User(
|
||||||
id=int(obj["id_str"]),
|
id=int(obj["id_str"]),
|
||||||
id_str=obj["id_str"],
|
id_str=obj["id_str"],
|
||||||
@ -135,7 +135,7 @@ class User(JSONTrait):
|
|||||||
profileBannerUrl=obj.get("profile_banner_url"),
|
profileBannerUrl=obj.get("profile_banner_url"),
|
||||||
verified=obj.get("verified"),
|
verified=obj.get("verified"),
|
||||||
protected=obj.get("protected"),
|
protected=obj.get("protected"),
|
||||||
descriptionLinks=links,
|
descriptionLinks=_parse_links(obj, ["entities.description.urls", "entities.url.urls"]),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -197,7 +197,7 @@ class Tweet(JSONTrait):
|
|||||||
hashtags=[x["text"] for x in get_or(obj, "entities.hashtags", [])],
|
hashtags=[x["text"] for x in get_or(obj, "entities.hashtags", [])],
|
||||||
cashtags=[x["text"] for x in get_or(obj, "entities.symbols", [])],
|
cashtags=[x["text"] for x in get_or(obj, "entities.symbols", [])],
|
||||||
mentionedUsers=[UserRef.parse(x) for x in get_or(obj, "entities.user_mentions", [])],
|
mentionedUsers=[UserRef.parse(x) for x in get_or(obj, "entities.user_mentions", [])],
|
||||||
links=[TextLink.parse(x) for x in get_or(obj, "entities.urls", [])],
|
links=_parse_links(obj, ["entities.urls"]),
|
||||||
viewCount=int_or_none(obj, "ext_views.count"),
|
viewCount=int_or_none(obj, "ext_views.count"),
|
||||||
retweetedTweet=Tweet.parse(rt_obj, res) if rt_obj else None,
|
retweetedTweet=Tweet.parse(rt_obj, res) if rt_obj else None,
|
||||||
quotedTweet=Tweet.parse(qt_obj, res) if qt_obj else None,
|
quotedTweet=Tweet.parse(qt_obj, res) if qt_obj else None,
|
||||||
@ -335,3 +335,13 @@ def _get_source_label(tw_obj: dict):
|
|||||||
if source and (match := re.search(r">([^<]*)<", source)):
|
if source and (match := re.search(r">([^<]*)<", source)):
|
||||||
return str(match.group(1))
|
return str(match.group(1))
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_links(obj: dict, paths: list[str]):
|
||||||
|
links = []
|
||||||
|
for x in paths:
|
||||||
|
links.extend(get_or(obj, x, []))
|
||||||
|
|
||||||
|
links = [TextLink.parse(x) for x in links]
|
||||||
|
links = [x for x in links if x is not None]
|
||||||
|
return links
|
||||||
|
|||||||
Загрузка…
x
Ссылка в новой задаче
Block a user