Этот коммит содержится в:
Vlad Pronsky 2023-09-08 18:04:25 +03:00
родитель 99bf64028e
Коммит 4d8c91a211
4 изменённых файлов: 20 добавлений и 10 удалений

Просмотреть файл

@ -10,9 +10,13 @@ build:
@python -m build
ci:
@make format
@make lint
@make test
format:
@black .
lint:
@ruff check twscrape
@ruff check tests

Просмотреть файл

@ -93,11 +93,11 @@ def check_tweet(doc: Tweet | None):
try:
assert doc.rawContent.endswith(doc.retweetedTweet.rawContent), "content should be full"
except AssertionError as e:
print('\n' + '-' * 60)
print("\n" + "-" * 60)
print(doc.url)
print('1:', doc.rawContent)
print('2:', doc.retweetedTweet.rawContent)
print('-' * 60)
print("1:", doc.rawContent)
print("2:", doc.retweetedTweet.rawContent)
print("-" * 60)
raise e
check_user(doc.user)

Просмотреть файл

@ -211,7 +211,9 @@ class Tweet(JSONTrait):
hashtags=[x["text"] for x in get_or(obj, "entities.hashtags", [])],
cashtags=[x["text"] for x in get_or(obj, "entities.symbols", [])],
mentionedUsers=[UserRef.parse(x) for x in get_or(obj, "entities.user_mentions", [])],
links=_parse_links(obj, ["entities.urls", "note_tweet.note_tweet_results.result.entity_set.urls"]),
links=_parse_links(
obj, ["entities.urls", "note_tweet.note_tweet_results.result.entity_set.urls"]
),
viewCount=_get_views(obj, rt_obj or {}),
retweetedTweet=Tweet.parse(rt_obj, res) if rt_obj else None,
quotedTweet=Tweet.parse(qt_obj, res) if qt_obj else None,
@ -230,8 +232,8 @@ class Tweet(JSONTrait):
if rt is not None and rt.user is not None and doc.rawContent.endswith(""):
# prefix = f"RT @{rt.user.username}: "
# if login changed, old login can be cached in rawContent, so use less strict check
prefix = f"RT @"
prefix = "RT @"
rt_msg = f"{prefix}{rt.rawContent}"
if doc.rawContent != rt_msg and doc.rawContent.startswith(prefix):
doc.rawContent = rt_msg

Просмотреть файл

@ -134,13 +134,17 @@ def to_old_obj(obj: dict):
def to_old_rep(obj: dict) -> dict[str, dict]:
tmp = get_typed_object(obj, defaultdict(list))
tweets = [x for x in tmp.get("Tweet", []) if "legacy" in x]
tweets = {str(x["rest_id"]): to_old_obj(x) for x in tweets}
tw1 = [x for x in tmp.get("Tweet", []) if "legacy" in x]
tw1 = {str(x["rest_id"]): to_old_obj(x) for x in tw1}
# https://github.com/vladkens/twscrape/issues/53
tw2 = [x["tweet"] for x in tmp.get("TweetWithVisibilityResults", []) if "legacy" in x["tweet"]]
tw2 = {str(x["rest_id"]): to_old_obj(x) for x in tw2}
users = [x for x in tmp.get("User", []) if "legacy" in x and "id" in x]
users = {str(x["rest_id"]): to_old_obj(x) for x in users}
return {"tweets": tweets, "users": users}
return {"tweets": {**tw1, **tw2}, "users": users}
def utc_ts() -> int: