diff --git a/Makefile b/Makefile index b19b6c1..ed54638 100644 --- a/Makefile +++ b/Makefile @@ -10,9 +10,13 @@ build: @python -m build ci: + @make format @make lint @make test +format: + @black . + lint: @ruff check twscrape @ruff check tests diff --git a/tests/test_parser.py b/tests/test_parser.py index 58fa935..377db24 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -93,11 +93,11 @@ def check_tweet(doc: Tweet | None): try: assert doc.rawContent.endswith(doc.retweetedTweet.rawContent), "content should be full" except AssertionError as e: - print('\n' + '-' * 60) + print("\n" + "-" * 60) print(doc.url) - print('1:', doc.rawContent) - print('2:', doc.retweetedTweet.rawContent) - print('-' * 60) + print("1:", doc.rawContent) + print("2:", doc.retweetedTweet.rawContent) + print("-" * 60) raise e check_user(doc.user) diff --git a/twscrape/models.py b/twscrape/models.py index 4a1218e..31f5852 100644 --- a/twscrape/models.py +++ b/twscrape/models.py @@ -211,7 +211,9 @@ class Tweet(JSONTrait): hashtags=[x["text"] for x in get_or(obj, "entities.hashtags", [])], cashtags=[x["text"] for x in get_or(obj, "entities.symbols", [])], mentionedUsers=[UserRef.parse(x) for x in get_or(obj, "entities.user_mentions", [])], - links=_parse_links(obj, ["entities.urls", "note_tweet.note_tweet_results.result.entity_set.urls"]), + links=_parse_links( + obj, ["entities.urls", "note_tweet.note_tweet_results.result.entity_set.urls"] + ), viewCount=_get_views(obj, rt_obj or {}), retweetedTweet=Tweet.parse(rt_obj, res) if rt_obj else None, quotedTweet=Tweet.parse(qt_obj, res) if qt_obj else None, @@ -230,8 +232,8 @@ class Tweet(JSONTrait): if rt is not None and rt.user is not None and doc.rawContent.endswith("…"): # prefix = f"RT @{rt.user.username}: " # if login changed, old login can be cached in rawContent, so use less strict check - prefix = f"RT @" - + prefix = "RT @" + rt_msg = f"{prefix}{rt.rawContent}" if doc.rawContent != rt_msg and doc.rawContent.startswith(prefix): doc.rawContent = rt_msg diff --git a/twscrape/utils.py b/twscrape/utils.py index 61bb21f..5e1fd09 100644 --- a/twscrape/utils.py +++ b/twscrape/utils.py @@ -134,13 +134,17 @@ def to_old_obj(obj: dict): def to_old_rep(obj: dict) -> dict[str, dict]: tmp = get_typed_object(obj, defaultdict(list)) - tweets = [x for x in tmp.get("Tweet", []) if "legacy" in x] - tweets = {str(x["rest_id"]): to_old_obj(x) for x in tweets} + tw1 = [x for x in tmp.get("Tweet", []) if "legacy" in x] + tw1 = {str(x["rest_id"]): to_old_obj(x) for x in tw1} + + # https://github.com/vladkens/twscrape/issues/53 + tw2 = [x["tweet"] for x in tmp.get("TweetWithVisibilityResults", []) if "legacy" in x["tweet"]] + tw2 = {str(x["rest_id"]): to_old_obj(x) for x in tw2} users = [x for x in tmp.get("User", []) if "legacy" in x and "id" in x] users = {str(x["rest_id"]): to_old_obj(x) for x in users} - return {"tweets": tweets, "users": users} + return {"tweets": {**tw1, **tw2}, "users": users} def utc_ts() -> int: