зеркало из
https://github.com/viginum-datalab/twscrape.git
synced 2025-10-29 13:06:13 +02:00
TweetWithVisibilityResults #53
Этот коммит содержится в:
родитель
99bf64028e
Коммит
4d8c91a211
4
Makefile
4
Makefile
@ -10,9 +10,13 @@ build:
|
||||
@python -m build
|
||||
|
||||
ci:
|
||||
@make format
|
||||
@make lint
|
||||
@make test
|
||||
|
||||
format:
|
||||
@black .
|
||||
|
||||
lint:
|
||||
@ruff check twscrape
|
||||
@ruff check tests
|
||||
|
||||
@ -93,11 +93,11 @@ def check_tweet(doc: Tweet | None):
|
||||
try:
|
||||
assert doc.rawContent.endswith(doc.retweetedTweet.rawContent), "content should be full"
|
||||
except AssertionError as e:
|
||||
print('\n' + '-' * 60)
|
||||
print("\n" + "-" * 60)
|
||||
print(doc.url)
|
||||
print('1:', doc.rawContent)
|
||||
print('2:', doc.retweetedTweet.rawContent)
|
||||
print('-' * 60)
|
||||
print("1:", doc.rawContent)
|
||||
print("2:", doc.retweetedTweet.rawContent)
|
||||
print("-" * 60)
|
||||
raise e
|
||||
|
||||
check_user(doc.user)
|
||||
|
||||
@ -211,7 +211,9 @@ class Tweet(JSONTrait):
|
||||
hashtags=[x["text"] for x in get_or(obj, "entities.hashtags", [])],
|
||||
cashtags=[x["text"] for x in get_or(obj, "entities.symbols", [])],
|
||||
mentionedUsers=[UserRef.parse(x) for x in get_or(obj, "entities.user_mentions", [])],
|
||||
links=_parse_links(obj, ["entities.urls", "note_tweet.note_tweet_results.result.entity_set.urls"]),
|
||||
links=_parse_links(
|
||||
obj, ["entities.urls", "note_tweet.note_tweet_results.result.entity_set.urls"]
|
||||
),
|
||||
viewCount=_get_views(obj, rt_obj or {}),
|
||||
retweetedTweet=Tweet.parse(rt_obj, res) if rt_obj else None,
|
||||
quotedTweet=Tweet.parse(qt_obj, res) if qt_obj else None,
|
||||
@ -230,8 +232,8 @@ class Tweet(JSONTrait):
|
||||
if rt is not None and rt.user is not None and doc.rawContent.endswith("…"):
|
||||
# prefix = f"RT @{rt.user.username}: "
|
||||
# if login changed, old login can be cached in rawContent, so use less strict check
|
||||
prefix = f"RT @"
|
||||
|
||||
prefix = "RT @"
|
||||
|
||||
rt_msg = f"{prefix}{rt.rawContent}"
|
||||
if doc.rawContent != rt_msg and doc.rawContent.startswith(prefix):
|
||||
doc.rawContent = rt_msg
|
||||
|
||||
@ -134,13 +134,17 @@ def to_old_obj(obj: dict):
|
||||
def to_old_rep(obj: dict) -> dict[str, dict]:
|
||||
tmp = get_typed_object(obj, defaultdict(list))
|
||||
|
||||
tweets = [x for x in tmp.get("Tweet", []) if "legacy" in x]
|
||||
tweets = {str(x["rest_id"]): to_old_obj(x) for x in tweets}
|
||||
tw1 = [x for x in tmp.get("Tweet", []) if "legacy" in x]
|
||||
tw1 = {str(x["rest_id"]): to_old_obj(x) for x in tw1}
|
||||
|
||||
# https://github.com/vladkens/twscrape/issues/53
|
||||
tw2 = [x["tweet"] for x in tmp.get("TweetWithVisibilityResults", []) if "legacy" in x["tweet"]]
|
||||
tw2 = {str(x["rest_id"]): to_old_obj(x) for x in tw2}
|
||||
|
||||
users = [x for x in tmp.get("User", []) if "legacy" in x and "id" in x]
|
||||
users = {str(x["rest_id"]): to_old_obj(x) for x in users}
|
||||
|
||||
return {"tweets": tweets, "users": users}
|
||||
return {"tweets": {**tw1, **tw2}, "users": users}
|
||||
|
||||
|
||||
def utc_ts() -> int:
|
||||
|
||||
Загрузка…
x
Ссылка в новой задаче
Block a user