зеркало из
https://github.com/viginum-datalab/twscrape.git
synced 2025-10-29 21:16:25 +02:00
TweetWithVisibilityResults #53
Этот коммит содержится в:
родитель
99bf64028e
Коммит
4d8c91a211
4
Makefile
4
Makefile
@ -10,9 +10,13 @@ build:
|
|||||||
@python -m build
|
@python -m build
|
||||||
|
|
||||||
ci:
|
ci:
|
||||||
|
@make format
|
||||||
@make lint
|
@make lint
|
||||||
@make test
|
@make test
|
||||||
|
|
||||||
|
format:
|
||||||
|
@black .
|
||||||
|
|
||||||
lint:
|
lint:
|
||||||
@ruff check twscrape
|
@ruff check twscrape
|
||||||
@ruff check tests
|
@ruff check tests
|
||||||
|
|||||||
@ -93,11 +93,11 @@ def check_tweet(doc: Tweet | None):
|
|||||||
try:
|
try:
|
||||||
assert doc.rawContent.endswith(doc.retweetedTweet.rawContent), "content should be full"
|
assert doc.rawContent.endswith(doc.retweetedTweet.rawContent), "content should be full"
|
||||||
except AssertionError as e:
|
except AssertionError as e:
|
||||||
print('\n' + '-' * 60)
|
print("\n" + "-" * 60)
|
||||||
print(doc.url)
|
print(doc.url)
|
||||||
print('1:', doc.rawContent)
|
print("1:", doc.rawContent)
|
||||||
print('2:', doc.retweetedTweet.rawContent)
|
print("2:", doc.retweetedTweet.rawContent)
|
||||||
print('-' * 60)
|
print("-" * 60)
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
check_user(doc.user)
|
check_user(doc.user)
|
||||||
|
|||||||
@ -211,7 +211,9 @@ class Tweet(JSONTrait):
|
|||||||
hashtags=[x["text"] for x in get_or(obj, "entities.hashtags", [])],
|
hashtags=[x["text"] for x in get_or(obj, "entities.hashtags", [])],
|
||||||
cashtags=[x["text"] for x in get_or(obj, "entities.symbols", [])],
|
cashtags=[x["text"] for x in get_or(obj, "entities.symbols", [])],
|
||||||
mentionedUsers=[UserRef.parse(x) for x in get_or(obj, "entities.user_mentions", [])],
|
mentionedUsers=[UserRef.parse(x) for x in get_or(obj, "entities.user_mentions", [])],
|
||||||
links=_parse_links(obj, ["entities.urls", "note_tweet.note_tweet_results.result.entity_set.urls"]),
|
links=_parse_links(
|
||||||
|
obj, ["entities.urls", "note_tweet.note_tweet_results.result.entity_set.urls"]
|
||||||
|
),
|
||||||
viewCount=_get_views(obj, rt_obj or {}),
|
viewCount=_get_views(obj, rt_obj or {}),
|
||||||
retweetedTweet=Tweet.parse(rt_obj, res) if rt_obj else None,
|
retweetedTweet=Tweet.parse(rt_obj, res) if rt_obj else None,
|
||||||
quotedTweet=Tweet.parse(qt_obj, res) if qt_obj else None,
|
quotedTweet=Tweet.parse(qt_obj, res) if qt_obj else None,
|
||||||
@ -230,8 +232,8 @@ class Tweet(JSONTrait):
|
|||||||
if rt is not None and rt.user is not None and doc.rawContent.endswith("…"):
|
if rt is not None and rt.user is not None and doc.rawContent.endswith("…"):
|
||||||
# prefix = f"RT @{rt.user.username}: "
|
# prefix = f"RT @{rt.user.username}: "
|
||||||
# if login changed, old login can be cached in rawContent, so use less strict check
|
# if login changed, old login can be cached in rawContent, so use less strict check
|
||||||
prefix = f"RT @"
|
prefix = "RT @"
|
||||||
|
|
||||||
rt_msg = f"{prefix}{rt.rawContent}"
|
rt_msg = f"{prefix}{rt.rawContent}"
|
||||||
if doc.rawContent != rt_msg and doc.rawContent.startswith(prefix):
|
if doc.rawContent != rt_msg and doc.rawContent.startswith(prefix):
|
||||||
doc.rawContent = rt_msg
|
doc.rawContent = rt_msg
|
||||||
|
|||||||
@ -134,13 +134,17 @@ def to_old_obj(obj: dict):
|
|||||||
def to_old_rep(obj: dict) -> dict[str, dict]:
|
def to_old_rep(obj: dict) -> dict[str, dict]:
|
||||||
tmp = get_typed_object(obj, defaultdict(list))
|
tmp = get_typed_object(obj, defaultdict(list))
|
||||||
|
|
||||||
tweets = [x for x in tmp.get("Tweet", []) if "legacy" in x]
|
tw1 = [x for x in tmp.get("Tweet", []) if "legacy" in x]
|
||||||
tweets = {str(x["rest_id"]): to_old_obj(x) for x in tweets}
|
tw1 = {str(x["rest_id"]): to_old_obj(x) for x in tw1}
|
||||||
|
|
||||||
|
# https://github.com/vladkens/twscrape/issues/53
|
||||||
|
tw2 = [x["tweet"] for x in tmp.get("TweetWithVisibilityResults", []) if "legacy" in x["tweet"]]
|
||||||
|
tw2 = {str(x["rest_id"]): to_old_obj(x) for x in tw2}
|
||||||
|
|
||||||
users = [x for x in tmp.get("User", []) if "legacy" in x and "id" in x]
|
users = [x for x in tmp.get("User", []) if "legacy" in x and "id" in x]
|
||||||
users = {str(x["rest_id"]): to_old_obj(x) for x in users}
|
users = {str(x["rest_id"]): to_old_obj(x) for x in users}
|
||||||
|
|
||||||
return {"tweets": tweets, "users": users}
|
return {"tweets": {**tw1, **tw2}, "users": users}
|
||||||
|
|
||||||
|
|
||||||
def utc_ts() -> int:
|
def utc_ts() -> int:
|
||||||
|
|||||||
Загрузка…
x
Ссылка в новой задаче
Block a user