Error reading in jsonl file in sparklyr

I am attempting to read in a variety of jsonl files (not regular json) with sparklyr using the following code.

sample <- spark_read_json(sc, "sample.jsonl")

As the files are very large, this is a sample of two lines from sample.jsonl data:

{"created_at": "Thu Jun 18 21:00:08 +0000 2020", "id": 1273722186369585155, "id_str": "1273722186369585155", "text": "RT @spunbeam: Breonna Taylor\u2019s name is no longer trending, and the police that murdered her are still free. If you see this please reply/qu\u2026", "source": "<a href=\"http://twitter.com/download/iphone\" rel=\"nofollow\">Twitter for iPhone</a>", "truncated": false, "in_reply_to_status_id": null, "in_reply_to_status_id_str": null, "in_reply_to_user_id": null, "in_reply_to_user_id_str": null, "in_reply_to_screen_name": null, "user": {"id": 789469454660956160, "id_str": "789469454660956160", "name": "Princess Daddy \ud83d\udc78\ud83c\udffe\ud83d\udc95", "screen_name": "jadaciti", "location": "Lafayette, LA", "url": "https://youtu.be/TaEw0P8qSeo", "description": "a goddess \ud83e\uddde\u200d\u2640\ufe0f & a cancer \u264b\ufe0f #goddessgang \u2728 #BlackLivesMatter \u270a\ud83c\udffc\u270a\ud83c\udffd\u270a\ud83c\udffe\u270a\ud83c\udfff\ud83d\udda4", "translator_type": "none", "protected": false, "verified": false, "followers_count": 229, "friends_count": 152, "listed_count": 0, "favourites_count": 7665, "statuses_count": 7702, "created_at": "Fri Oct 21 14:12:40 +0000 2016", "utc_offset": null, "time_zone": null, "geo_enabled": false, "lang": null, "contributors_enabled": false, "is_translator": false, "profile_background_color": "F5F8FA", "profile_background_image_url": "", "profile_background_image_url_https": "", "profile_background_tile": false, "profile_link_color": "1DA1F2", "profile_sidebar_border_color": "C0DEED", "profile_sidebar_fill_color": "DDEEF6", "profile_text_color": "333333", "profile_use_background_image": true, "profile_image_url": "http://pbs.twimg.com/profile_images/1270931500297605121/tK6ICOLj_normal.jpg", "profile_image_url_https": "https://pbs.twimg.com/profile_images/1270931500297605121/tK6ICOLj_normal.jpg", "profile_banner_url": "https://pbs.twimg.com/profile_banners/789469454660956160/1591421951", "default_profile": true, "default_profile_image": false, "following": null, "follow_request_sent": null, "notifications": null}, "geo": null, "coordinates": null, "place": null, "contributors": null, "retweeted_status": {"created_at": "Wed Jun 17 15:21:31 +0000 2020", "id": 1273274580833050624, "id_str": "1273274580833050624", "text": "Breonna Taylor\u2019s name is no longer trending, and the police that murdered her are still free. If you see this pleas\u2026 https://t.co/f88oqSdhla", "source": "<a href=\"http://twitter.com/download/android\" rel=\"nofollow\">Twitter for Android</a>", "truncated": true, "in_reply_to_status_id": null, "in_reply_to_status_id_str": null, "in_reply_to_user_id": null, "in_reply_to_user_id_str": null, "in_reply_to_screen_name": null, "user": {"id": 1199772686865616909, "id_str": "1199772686865616909", "name": "BLM Green Man \ud83d\udd1c TipDaddy", "screen_name": "spunbeam", "location": "BPD TOWN USA", "url": null, "description": "yer local greenbean\ud83e\udd74wobbling heavily since 2016\ud83c\udf44(*\u02d8\ufe36\u02d8*).\uff61*\ud83d\udc9a\ud83d\udd1c TipDaddy n Friends. \n21 - \u2653\u2600\ufe0f\u264a\ud83c\udf17\u264a\u2b06\ufe0f -Toxic for men, Wholesome for women \ud83c\udff3\ufe0f\u200d\ud83c\udf08\ud83c\udde8\ud83c\uddfa\ud83c\udf44\ud83d\ude3b\ud83c\udf32\ud83c\udfb6&LSD", "translator_type": "none", "protected": false, "verified": false, "followers_count": 651, "friends_count": 1224, "listed_count": 0, "favourites_count": 23214, "statuses_count": 4142, "created_at": "Wed Nov 27 19:31:20 +0000 2019", "utc_offset": null, "time_zone": null, "geo_enabled": false, "lang": null, "contributors_enabled": false, "is_translator": false, "profile_background_color": "F5F8FA", "profile_background_image_url": "", "profile_background_image_url_https": "", "profile_background_tile": false, "profile_link_color": "1DA1F2", "profile_sidebar_border_color": "C0DEED", "profile_sidebar_fill_color": "DDEEF6", "profile_text_color": "333333", "profile_use_background_image": true, "profile_image_url": "http://pbs.twimg.com/profile_images/1248737938885746695/5lgHu_18_normal.jpg", "profile_image_url_https": "https://pbs.twimg.com/profile_images/1248737938885746695/5lgHu_18_normal.jpg", "profile_banner_url": "https://pbs.twimg.com/profile_banners/1199772686865616909/1583024393", "default_profile": true, "default_profile_image": false, "following": null, "follow_request_sent": null, "notifications": null}, "geo": null, "coordinates": null, "place": null, "contributors": null, "is_quote_status": false, "extended_tweet": {"full_text": "Breonna Taylor\u2019s name is no longer trending, and the police that murdered her are still free. If you see this please reply/quote/tweet on ur own. ARREST THE COPS WHO KILLED #BREONNATAYLOR", "display_text_range": [0, 187], "entities": {"hashtags": [{"text": "BREONNATAYLOR", "indices": [173, 187]}], "urls": [], "user_mentions": [], "symbols": []}}, "quote_count": 780, "reply_count": 213, "retweet_count": 15755, "favorite_count": 12153, "entities": {"hashtags": [], "urls": [{"url": "https://t.co/f88oqSdhla", "expanded_url": "https://twitter.com/i/web/status/1273274580833050624", "display_url": "twitter.com/i/web/status/1\u2026", "indices": [117, 140]}], "user_mentions": [], "symbols": []}, "favorited": false, "retweeted": false, "filter_level": "low", "lang": "en"}, "is_quote_status": false, "quote_count": 0, "reply_count": 0, "retweet_count": 0, "favorite_count": 0, "entities": {"hashtags": [], "urls": [], "user_mentions": [{"screen_name": "spunbeam", "name": "BLM Green Man \ud83d\udd1c TipDaddy", "id": 1199772686865616909, "id_str": "1199772686865616909", "indices": [3, 12]}], "symbols": []}, "favorited": false, "retweeted": false, "filter_level": "low", "lang": "en", "timestamp_ms": "1592514008501"}
{"created_at": "Thu Jun 18 21:00:08 +0000 2020", "id": 1273722186436665346, "id_str": "1273722186436665346", "text": "RT @GreenEyeRaven1: If we have to remind you our bodies are ours ...we will...stop killing us\n#GBVmustfall #day84oflockdown #COVID19 #Prote\u2026", "source": "<a href=\"http://twitter.com/download/android\" rel=\"nofollow\">Twitter for Android</a>", "truncated": false, "in_reply_to_status_id": null, "in_reply_to_status_id_str": null, "in_reply_to_user_id": null, "in_reply_to_user_id_str": null, "in_reply_to_screen_name": null, "user": {"id": 504191291, "id_str": "504191291", "name": "GoldenMdee", "screen_name": "GoldenMdee", "location": "South Africa", "url": null, "description": "\u201cBe kind to the person you are becoming\u201d \u2665\ufe0f\n\n            \nhttps://www.instagram.com/goldenmdee/", "translator_type": "none", "protected": false, "verified": false, "followers_count": 5827, "friends_count": 5430, "listed_count": 0, "favourites_count": 22118, "statuses_count": 10078, "created_at": "Sun Feb 26 08:16:57 +0000 2012", "utc_offset": null, "time_zone": null, "geo_enabled": true, "lang": null, "contributors_enabled": false, "is_translator": false, "profile_background_color": "C0DEED", "profile_background_image_url": "http://abs.twimg.com/images/themes/theme1/bg.png", "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme1/bg.png", "profile_background_tile": false, "profile_link_color": "1DA1F2", "profile_sidebar_border_color": "C0DEED", "profile_sidebar_fill_color": "DDEEF6", "profile_text_color": "333333", "profile_use_background_image": true, "profile_image_url": "http://pbs.twimg.com/profile_images/1225124177075736577/xU452O6s_normal.jpg", "profile_image_url_https": "https://pbs.twimg.com/profile_images/1225124177075736577/xU452O6s_normal.jpg", "profile_banner_url": "https://pbs.twimg.com/profile_banners/504191291/1586165128", "default_profile": true, "default_profile_image": false, "following": null, "follow_request_sent": null, "notifications": null}, "geo": null, "coordinates": null, "place": null, "contributors": null, "retweeted_status": {"created_at": "Thu Jun 18 11:14:49 +0000 2020", "id": 1273574885122605061, "id_str": "1273574885122605061", "text": "If we have to remind you our bodies are ours ...we will...stop killing us\n#GBVmustfall #day84oflockdown #COVID19\u2026 https://t.co/UUk2uGJKui", "display_text_range": [0, 140], "source": "<a href=\"http://twitter.com/download/android\" rel=\"nofollow\">Twitter for Android</a>", "truncated": true, "in_reply_to_status_id": null, "in_reply_to_status_id_str": null, "in_reply_to_user_id": null, "in_reply_to_user_id_str": null, "in_reply_to_screen_name": null, "user": {"id": 1249208580579954689, "id_str": "1249208580579954689", "name": "Green Eyed Raven", "screen_name": "GreenEyeRaven1", "location": null, "url": "http://onlyfans.com/greeneyedraven", "description": "Only Respond to DMs and private requests on ONLYFANS\ud83e\udd2d\nold account deleted at 30k\ud83d\udc94\nNude Model \u25cb\nContent Creator \u25cf", "translator_type": "none", "protected": false, "verified": false, "followers_count": 5688, "friends_count": 81, "listed_count": 11, "favourites_count": 189, "statuses_count": 249, "created_at": "Sun Apr 12 05:32:10 +0000 2020", "utc_offset": null, "time_zone": null, "geo_enabled": false, "lang": null, "contributors_enabled": false, "is_translator": false, "profile_background_color": "F5F8FA", "profile_background_image_url": "", "profile_background_image_url_https": "", "profile_background_tile": false, "profile_link_color": "1DA1F2", "profile_sidebar_border_color": "C0DEED", "profile_sidebar_fill_color": "DDEEF6", "profile_text_color": "333333", "profile_use_background_image": true, "profile_image_url": "http://pbs.twimg.com/profile_images/1273699570770419714/uM9rUKWw_normal.jpg", "profile_image_url_https": "https://pbs.twimg.com/profile_images/1273699570770419714/uM9rUKWw_normal.jpg", "default_profile": true, "default_profile_image": false, "following": null, "follow_request_sent": null, "notifications": null}, "geo": null, "coordinates": null, "place": null, "contributors": null, "is_quote_status": false, "extended_tweet": {"full_text": "If we have to remind you our bodies are ours ...we will...stop killing us\n#GBVmustfall #day84oflockdown #COVID19 #ProtectBlackWomen #BlackLivesMatter https://t.co/hRWdSYCmkN", "display_text_range": [0, 149], "entities": {"hashtags": [{"text": "GBVmustfall", "indices": [74, 86]}, {"text": "day84oflockdown", "indices": [87, 103]}, {"text": "COVID19", "indices": [104, 112]}, {"text": "ProtectBlackWomen", "indices": [113, 131]}, {"text": "BlackLivesMatter", "indices": [132, 149]}], "urls": [], "user_mentions": [], "symbols": [], "media": [{"id": 1273574859147292673, "id_str": "1273574859147292673", "indices": [150, 173], "media_url": "http://pbs.twimg.com/media/EaylljJXsAEWa_9.jpg", "media_url_https": "https://pbs.twimg.com/media/EaylljJXsAEWa_9.jpg", "url": "https://t.co/hRWdSYCmkN", "display_url": "pic.twitter.com/hRWdSYCmkN", "expanded_url": "https://twitter.com/GreenEyeRaven1/status/1273574885122605061/photo/1", "type": "photo", "sizes": {"thumb": {"w": 150, "h": 150, "resize": "crop"}, "medium": {"w": 1200, "h": 871, "resize": "fit"}, "large": {"w": 1600, "h": 1161, "resize": "fit"}, "small": {"w": 680, "h": 493, "resize": "fit"}}}, {"id": 1273574873634398208, "id_str": "1273574873634398208", "indices": [150, 173], "media_url": "http://pbs.twimg.com/media/EaylmZHXYAA__2Z.jpg", "media_url_https": "https://pbs.twimg.com/media/EaylmZHXYAA__2Z.jpg", "url": "https://t.co/hRWdSYCmkN", "display_url": "pic.twitter.com/hRWdSYCmkN", "expanded_url": "https://twitter.com/GreenEyeRaven1/status/1273574885122605061/photo/1", "type": "photo", "sizes": {"large": {"w": 864, "h": 1296, "resize": "fit"}, "thumb": {"w": 150, "h": 150, "resize": "crop"}, "medium": {"w": 800, "h": 1200, "resize": "fit"}, "small": {"w": 453, "h": 680, "resize": "fit"}}}]}, "extended_entities": {"media": [{"id": 1273574859147292673, "id_str": "1273574859147292673", "indices": [150, 173], "media_url": "http://pbs.twimg.com/media/EaylljJXsAEWa_9.jpg", "media_url_https": "https://pbs.twimg.com/media/EaylljJXsAEWa_9.jpg", "url": "https://t.co/hRWdSYCmkN", "display_url": "pic.twitter.com/hRWdSYCmkN", "expanded_url": "https://twitter.com/GreenEyeRaven1/status/1273574885122605061/photo/1", "type": "photo", "sizes": {"thumb": {"w": 150, "h": 150, "resize": "crop"}, "medium": {"w": 1200, "h": 871, "resize": "fit"}, "large": {"w": 1600, "h": 1161, "resize": "fit"}, "small": {"w": 680, "h": 493, "resize": "fit"}}}, {"id": 1273574873634398208, "id_str": "1273574873634398208", "indices": [150, 173], "media_url": "http://pbs.twimg.com/media/EaylmZHXYAA__2Z.jpg", "media_url_https": "https://pbs.twimg.com/media/EaylmZHXYAA__2Z.jpg", "url": "https://t.co/hRWdSYCmkN", "display_url": "pic.twitter.com/hRWdSYCmkN", "expanded_url": "https://twitter.com/GreenEyeRaven1/status/1273574885122605061/photo/1", "type": "photo", "sizes": {"large": {"w": 864, "h": 1296, "resize": "fit"}, "thumb": {"w": 150, "h": 150, "resize": "crop"}, "medium": {"w": 800, "h": 1200, "resize": "fit"}, "small": {"w": 453, "h": 680, "resize": "fit"}}}]}}, "quote_count": 538, "reply_count": 1290, "retweet_count": 2466, "favorite_count": 10241, "entities": {"hashtags": [{"text": "GBVmustfall", "indices": [74, 86]}, {"text": "day84oflockdown", "indices": [87, 103]}, {"text": "COVID19", "indices": [104, 112]}], "urls": [{"url": "https://t.co/UUk2uGJKui", "expanded_url": "https://twitter.com/i/web/status/1273574885122605061", "display_url": "twitter.com/i/web/status/1\u2026", "indices": [114, 137]}], "user_mentions": [], "symbols": []}, "favorited": false, "retweeted": false, "possibly_sensitive": true, "filter_level": "low", "lang": "en"}, "is_quote_status": false, "quote_count": 0, "reply_count": 0, "retweet_count": 0, "favorite_count": 0, "entities": {"hashtags": [{"text": "GBVmustfall", "indices": [94, 106]}, {"text": "day84oflockdown", "indices": [107, 123]}, {"text": "COVID19", "indices": [124, 132]}], "urls": [], "user_mentions": [{"screen_name": "GreenEyeRaven1", "name": "Green Eyed Raven", "id": 1249208580579954689, "id_str": "1249208580579954689", "indices": [3, 18]}], "symbols": []}, "favorited": false, "retweeted": false, "filter_level": "low", "lang": "en", "timestamp_ms": "1592514008517"}

I'm getting the following error:

Error: org.apache.spark.sql.AnalysisException: Since Spark 2.3, the queries from raw JSON/CSV files are disallowed when the
referenced columns only include the internal corrupt record column
(named _corrupt_record by default). For example:
spark.read.schema(schema).json(file).filter("_corrupt_record".isNotNull).count() and spark.read.schema(schema).json(file).select("_corrupt_record").show(). Instead, you can cache or save the parsed results and then send the same query. For example, val df = spark.read.schema(schema).json(file).cache() and then df.filter("_corrupt_record".isNotNull).count().;

Looking through this code, it seems that spark.read.schema(schema).etc and other commands are not sparklyr commands. Has anyone experienced this issue before and found a way to deal with it? Thanks!

Can you try to share a reproducible example to help investigate? If I copy the sample JSONL lines into repro-1.json, I can run the following code successfully in Spark 2.0, 2.3, 2.4 and 3.0.

library(sparklyr)
sc <- spark_connect(master = "local")
spark_read_json(sc, "repro-1.json")
# Source: spark<repro1> [?? x 28]
  contributors coordinates created_at entities favorite_count favorited filter_level geo        id id_str in_reply_to_scr…
  <chr>        <chr>       <chr>      <list>            <dbl> <lgl>     <chr>        <chr>   <dbl> <chr>  <chr>           
1 NA           NA          Thu Jun 1… <named …              0 FALSE     low          NA    1.27e18 12737… NA              
2 NA           NA          Thu Jun 1… <named …              0 FALSE     low          NA    1.27e18 12737… NA              
# … with 17 more variables: in_reply_to_status_id <chr>, in_reply_to_status_id_str <chr>, in_reply_to_user_id <chr>,
#   in_reply_to_user_id_str <chr>, is_quote_status <lgl>, lang <chr>, place <chr>, quote_count <dbl>, reply_count <dbl>,
#   retweet_count <dbl>, retweeted <lgl>, retweeted_status <list>, source <chr>, text <chr>, timestamp_ms <chr>,
#   truncated <lgl>, user <list>

Have you tried updating sparklyr to the latest version by running install.packages("sparklyr")?

Having looked at how to get reproducible data when there is not a dataset, I've used deparse() as recommended but am continuing to include some sample lines below.

head -n 1 june1-aa.jsonl
{"created_at": "Thu May 28 15:13:14 +0000 2020", "id": 1266024741648977920, "id_str": "1266024741648977920", "text": "#minneapolisriots #GeorgeFloyd #AhmaudArbery \nhttps://t.co/Pyrgfk9d1r", "source": "<a href=\"https://mobile.twitter.com\" rel=\"nofollow\">Twitter Web App</a>", "truncated": false, "in_reply_to_status_id": null, "in_reply_to_status_id_str": null, "in_reply_to_user_id": null, "in_reply_to_user_id_str": null, "in_reply_to_screen_name": null, "user": {"id": 32942312, "id_str": "32942312", "name": "Saptech", "screen_name": "saptech", "location": "STLMO", "url": null, "description": "I'd love to change the world but I don't know what to do, so I'll leave it up to you!\nI'm a big #linux user, currently using #Debian10 and #Mageia7.", "translator_type": "none", "protected": false, "verified": false, "followers_count": 142, "friends_count": 215, "listed_count": 22, "favourites_count": 14638, "statuses_count": 11443, "created_at": "Sat Apr 18 17:02:17 +0000 2009", "utc_offset": null, "time_zone": null, "geo_enabled": false, "lang": null, "contributors_enabled": false, "is_translator": false, "profile_background_color": "C0DEED", "profile_background_image_url": "http://abs.twimg.com/images/themes/theme1/bg.png", "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme1/bg.png", "profile_background_tile": false, "profile_link_color": "1DA1F2", "profile_sidebar_border_color": "C0DEED", "profile_sidebar_fill_color": "DDEEF6", "profile_text_color": "333333", "profile_use_background_image": true, "profile_image_url": "http://pbs.twimg.com/profile_images/1251322240538488832/WonOh0Jh_normal.jpg", "profile_image_url_https": "https://pbs.twimg.com/profile_images/1251322240538488832/WonOh0Jh_normal.jpg", "default_profile": true, "default_profile_image": false, "following": null, "follow_request_sent": null, "notifications": null}, "geo": null, "coordinates": null, "place": null, "contributors": null, "is_quote_status": false, "quote_count": 0, "reply_count": 0, "retweet_count": 0, "favorite_count": 0, "entities": {"hashtags": [{"text": "minneapolisriots", "indices": [0, 17]}, {"text": "GeorgeFloyd", "indices": [18, 30]}, {"text": "AhmaudArbery", "indices": [31, 44]}], "urls": [{"url": "https://t.co/Pyrgfk9d1r", "expanded_url": "https://www.youtube.com/watch?v=za01QWLXisQ", "display_url": "youtube.com/watch?v=za01QW\u2026", "indices": [46, 69]}], "user_mentions": [], "symbols": []}, "favorited": false, "retweeted": false, "possibly_sensitive": false, "filter_level": "low", "lang": "und", "timestamp_ms": "1590678794668"}
{"created_at": "Thu May 28 15:13:15 +0000 2020", "id": 1266024745553846272, "id_str": "1266024745553846272", "text": "RT @nrawomen: Here\u2019s how to hide your home-defense #gun where you need it the most. https://t.co/zC18SW9ES0\n#2a #NRAWomen #prepared https:/\u2026", "source": "<a href=\"http://twitter.com/download/android\" rel=\"nofollow\">Twitter for Android</a>", "truncated": false, "in_reply_to_status_id": null, "in_reply_to_status_id_str": null, "in_reply_to_user_id": null, "in_reply_to_user_id_str": null, "in_reply_to_screen_name": null, "user": {"id": 915229609683243008, "id_str": "915229609683243008", "name": "mike ruger", "screen_name": "RugerBncurtis", "location": "Michigan, USA", "url": null, "description": "Proud NRA member Hunter, fisherman Concrete finisher, Guitarist #MAGA NO porn  Fox photo taken on property, Will never surrender my #2A rights", "translator_type": "none", "protected": false, "verified": false, "followers_count": 1742, "friends_count": 1553, "listed_count": 2, "favourites_count": 145661, "statuses_count": 32690, "created_at": "Tue Oct 03 14:58:37 +0000 2017", "utc_offset": null, "time_zone": null, "geo_enabled": false, "lang": null, "contributors_enabled": false, "is_translator": false, "profile_background_color": "F5F8FA", "profile_background_image_url": "", "profile_background_image_url_https": "", "profile_background_tile": false, "profile_link_color": "1DA1F2", "profile_sidebar_border_color": "C0DEED", "profile_sidebar_fill_color": "DDEEF6", "profile_text_color": "333333", "profile_use_background_image": true, "profile_image_url": "http://pbs.twimg.com/profile_images/1113450847722201089/8Dit5TrX_normal.jpg", "profile_image_url_https": "https://pbs.twimg.com/profile_images/1113450847722201089/8Dit5TrX_normal.jpg", "profile_banner_url": "https://pbs.twimg.com/profile_banners/915229609683243008/1532276695", "default_profile": true, "default_profile_image": false, "following": null, "follow_request_sent": null, "notifications": null}, "geo": null, "coordinates": null, "place": null, "contributors": null, "retweeted_status": {"created_at": "Thu May 28 13:32:58 +0000 2020", "id": 1265999506807705601, "id_str": "1265999506807705601", "text": "Here\u2019s how to hide your home-defense #gun where you need it the most. https://t.co/zC18SW9ES0\n#2a #NRAWomen\u2026 https://t.co/YRdb9tt3dp", "display_text_range": [0, 140], "source": "<a href=\"https://mobile.twitter.com\" rel=\"nofollow\">Twitter Web App</a>", "truncated": true, "in_reply_to_status_id": null, "in_reply_to_status_id_str": null, "in_reply_to_user_id": null, "in_reply_to_user_id_str": null, "in_reply_to_screen_name": null, "user": {"id": 495132092, "id_str": "495132092", "name": "NRA Women", "screen_name": "nrawomen", "location": null, "url": "http://nrawomen.tv", "description": "NRA Women is an educational resource for female gun enthusiasts. Come celebrate and unite with the women of the NRA. #NRAWomen", "translator_type": "none", "protected": false, "verified": false, "followers_count": 26440, "friends_count": 432, "listed_count": 205, "favourites_count": 547, "statuses_count": 3328, "created_at": "Fri Feb 17 16:46:53 +0000 2012", "utc_offset": null, "time_zone": null, "geo_enabled": false, "lang": null, "contributors_enabled": false, "is_translator": false, "profile_background_color": "123029", "profile_background_image_url": "http://abs.twimg.com/images/themes/theme1/bg.png", "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme1/bg.png", "profile_background_tile": false, "profile_link_color": "E6276C", "profile_sidebar_border_color": "FFFFFF", "profile_sidebar_fill_color": "DDEEF6", "profile_text_color": "333333", "profile_use_background_image": true, "profile_image_url": "http://pbs.twimg.com/profile_images/734838910396694528/rxFKRz9-_normal.jpg", "profile_image_url_https": "https://pbs.twimg.com/profile_images/734838910396694528/rxFKRz9-_normal.jpg", "profile_banner_url": "https://pbs.twimg.com/profile_banners/495132092/1555689499", "default_profile": false, "default_profile_image": false, "following": null, "follow_request_sent": null, "notifications": null}, "geo": null, "coordinates": null, "place": null, "contributors": null, "is_quote_status": false, "extended_tweet": {"full_text": "Here\u2019s how to hide your home-defense #gun where you need it the most. https://t.co/zC18SW9ES0\n#2a #NRAWomen #prepared https://t.co/1EeMGA5iDm", "display_text_range": [0, 117], "entities": {"hashtags": [{"text": "gun", "indices": [37, 41]}, {"text": "2a", "indices": [94, 97]}, {"text": "NRAWomen", "indices": [98, 107]}, {"text": "prepared", "indices": [108, 117]}], "urls": [{"url": "https://t.co/zC18SW9ES0", "expanded_url": "https://bit.ly/5storag35", "display_url": "bit.ly/5storag35", "indices": [70, 93]}], "user_mentions": [], "symbols": [], "media": [{"id": 1265999431754813440, "id_str": "1265999431754813440", "indices": [118, 141], "media_url": "http://pbs.twimg.com/media/EZG7xo-XkAACZ1K.jpg", "media_url_https": "https://pbs.twimg.com/media/EZG7xo-XkAACZ1K.jpg", "url": "https://t.co/1EeMGA5iDm", "display_url": "pic.twitter.com/1EeMGA5iDm", "expanded_url": "https://twitter.com/nrawomen/status/1265999506807705601/photo/1", "type": "photo", "sizes": {"large": {"w": 507, "h": 376, "resize": "fit"}, "thumb": {"w": 150, "h": 150, "resize": "crop"}, "medium": {"w": 507, "h": 376, "resize": "fit"}, "small": {"w": 507, "h": 376, "resize": "fit"}}}]}, "extended_entities": {"media": [{"id": 1265999431754813440, "id_str": "1265999431754813440", "indices": [118, 141], "media_url": "http://pbs.twimg.com/media/EZG7xo-XkAACZ1K.jpg", "media_url_https": "https://pbs.twimg.com/media/EZG7xo-XkAACZ1K.jpg", "url": "https://t.co/1EeMGA5iDm", "display_url": "pic.twitter.com/1EeMGA5iDm", "expanded_url": "https://twitter.com/nrawomen/status/1265999506807705601/photo/1", "type": "photo", "sizes": {"large": {"w": 507, "h": 376, "resize": "fit"}, "thumb": {"w": 150, "h": 150, "resize": "crop"}, "medium": {"w": 507, "h": 376, "resize": "fit"}, "small": {"w": 507, "h": 376, "resize": "fit"}}}]}}, "quote_count": 1, "reply_count": 6, "retweet_count": 13, "favorite_count": 61, "entities": {"hashtags": [{"text": "gun", "indices": [37, 41]}, {"text": "2a", "indices": [94, 97]}, {"text": "NRAWomen", "indices": [98, 107]}], "urls": [{"url": "https://t.co/zC18SW9ES0", "expanded_url": "https://bit.ly/5storag35", "display_url": "bit.ly/5storag35", "indices": [70, 93]}, {"url": "https://t.co/YRdb9tt3dp", "expanded_url": "https://twitter.com/i/web/status/1265999506807705601", "display_url": "twitter.com/i/web/status/1\u2026", "indices": [109, 132]}], "user_mentions": [], "symbols": []}, "favorited": false, "retweeted": false, "possibly_sensitive": false, "filter_level": "low", "lang": "en"}, "is_quote_status": false, "quote_count": 0, "reply_count": 0, "retweet_count": 0, "favorite_count": 0, "entities": {"hashtags": [{"text": "gun", "indices": [51, 55]}, {"text": "2a", "indices": [108, 111]}, {"text": "NRAWomen", "indices": [112, 121]}, {"text": "prepared", "indices": [122, 131]}], "urls": [{"url": "https://t.co/zC18SW9ES0", "expanded_url": "https://bit.ly/5storag35", "display_url": "bit.ly/5storag35", "indices": [84, 107]}], "user_mentions": [{"screen_name": "nrawomen", "name": "NRA Women", "id": 495132092, "id_str": "495132092", "indices": [3, 12]}], "symbols": []}, "favorited": false, "retweeted": false, "possibly_sensitive": false, "filter_level": "low", "lang": "en", "timestamp_ms": "1590678795599"}

And

head -n 2 janetweets_june24.jsonl
??{"created_at": "Tue Jun 23 13:34:34 +0000 2020", "id": 1275421995878866945, "id_str": "1275421995878866945", "text": "RT @daniels_roc: #Toakers #stonerdiaries\n#BenifitsofMarijuana\n#Medicalmarijuana\n#BlackLivesMatter\n#NativeAmericans\n@dalewilliams867\n@Hippie\u2026", "source": "<a href=\"http://twitter.com/download/iphone\" rel=\"nofollow\">Twitter for iPhone</a>", "truncated": false, "in_reply_to_status_id": null, "in_reply_to_status_id_str": null, "in_reply_to_user_id": null, "in_reply_to_user_id_str": null, "in_reply_to_screen_name": null, "user": {"id": 1195538863118139393, "id_str": "1195538863118139393", "name": "One Chance Initiative", "screen_name": "1Chance2020", "location": "Swing States, USA", "url": "https://www.youtube.com/channel/UC7lhK3B0J214PxeRmri56TQ", "description": "We have one chance to beat Donald Trump. @JoeBiden is that chance! Make it count! #Biden2020", "translator_type": "none", "protected": false, "verified": false, "followers_count": 13856, "friends_count": 15168, "listed_count": 10, "favourites_count": 11131, "statuses_count": 8440, "created_at": "Sat Nov 16 03:07:42 +0000 2019", "utc_offset": null, "time_zone": null, "geo_enabled": true, "lang": null, "contributors_enabled": false, "is_translator": false, "profile_background_color": "F5F8FA", "profile_background_image_url": "", "profile_background_image_url_https": "", "profile_background_tile": false, "profile_link_color": "1DA1F2", "profile_sidebar_border_color": "C0DEED", "profile_sidebar_fill_color": "DDEEF6", "profile_text_color": "333333", "profile_use_background_image": true, "profile_image_url": "http://pbs.twimg.com/profile_images/1199052061557456896/7wngRTGs_normal.jpg", "profile_image_url_https": "https://pbs.twimg.com/profile_images/1199052061557456896/7wngRTGs_normal.jpg", "profile_banner_url": "https://pbs.twimg.com/profile_banners/1195538863118139393/1574709536", "default_profile": true, "default_profile_image": false, "following": null, "follow_request_sent": null, "notifications": null}, "geo": null, "coordinates": null, "place": null, "contributors": null, "retweeted_status": {"created_at": "Tue Jun 23 13:04:12 +0000 2020", "id": 1275414355069329409, "id_str": "1275414355069329409", "text": "#Toakers #stonerdiaries\n#BenifitsofMarijuana\n#Medicalmarijuana\n#BlackLivesMatter\n#NativeAmericans\n@dalewilliams867\u2026 https://t.co/blszMPsjlc", "display_text_range": [0, 140], "source": "<a href=\"http://twitter.com/download/android\" rel=\"nofollow\">Twitter for Android</a>", "truncated": true, "in_reply_to_status_id": null, "in_reply_to_status_id_str": null, "in_reply_to_user_id": null, "in_reply_to_user_id_str": null, "in_reply_to_screen_name": null, "user": {"id": 1271971892912685063, "id_str": "1271971892912685063", "name": "\ud83c\udf34\ud83d\ude0e\ud83d\ude09\ud83d\ude37\ud83c\udf34DanielsRocNY", "screen_name": "daniels_roc", "location": null, "url": null, "description": "\ud83c\udf0aAdvocate for the legalization of medicinal & recreational marijuana. Have Muscular Dystrophy. Animal Lover & Writer with Muscular Dystrophy.", "translator_type": "none", "protected": false, "verified": false, "followers_count": 126, "friends_count": 260, "listed_count": 0, "favourites_count": 7, "statuses_count": 167, "created_at": "Sun Jun 14 01:07:13 +0000 2020", "utc_offset": null, "time_zone": null, "geo_enabled": false, "lang": null, "contributors_enabled": false, "is_translator": false, "profile_background_color": "F5F8FA", "profile_background_image_url": "", "profile_background_image_url_https": "", "profile_background_tile": false, "profile_link_color": "1DA1F2", "profile_sidebar_border_color": "C0DEED", "profile_sidebar_fill_color": "DDEEF6", "profile_text_color": "333333", "profile_use_background_image": true, "profile_image_url": "http://pbs.twimg.com/profile_images/1273354640277606400/RhFvYdPW_normal.jpg", "profile_image_url_https": "https://pbs.twimg.com/profile_images/1273354640277606400/RhFvYdPW_normal.jpg", "profile_banner_url": "https://pbs.twimg.com/profile_banners/1271971892912685063/1592097459", "default_profile": true, "default_profile_image": false, "following": null, "follow_request_sent": null, "notifications": null}, "geo": null, "coordinates": null, "place": null, "contributors": null, "is_quote_status": false, "extended_tweet": {"full_text": "#Toakers #stonerdiaries\n#BenifitsofMarijuana\n#Medicalmarijuana\n#BlackLivesMatter\n#NativeAmericans\n@dalewilliams867\n@Hippie_of_Love\n@1Chance2020\n@TammyAMH\n@Missin_Florida\n@JeffBirkenhaue2\n@AZsunshine_mama\n@I_Am_Concerned\n@The420Reminder\n@_jena4n\n@Jennife14860185\n@ACannabisQueen https://t.co/NukEWhSO6D", "display_text_range": [0, 277], "entities": {"hashtags": [{"text": "Toakers", "indices": [0, 8]}, {"text": "stonerdiaries", "indices": [9, 23]}, {"text": "BenifitsofMarijuana", "indices": [24, 44]}, {"text": "Medicalmarijuana", "indices": [45, 62]}, {"text": "BlackLivesMatter", "indices": [63, 80]}, {"text": "NativeAmericans", "indices": [81, 97]}], "urls": [], "user_mentions": [{"screen_name": "dalewilliams867", "name": "killing time\ud83c\udf0a\ud83c\udf0a\ud83d\udc4a\ud83d\udc4aveteran", "id": 2510643518, "id_str": "2510643518", "indices": [98, 114]}, {"screen_name": "Hippie_of_Love", "name": "Becca\ud83d\udca8\ud83d\udca8", "id": 512287398, "id_str": "512287398", "indices": [115, 130]}, {"screen_name": "1Chance2020", "name": "One Chance Initiative", "id": 1195538863118139393, "id_str": "1195538863118139393", "indices": [131, 143]}, {"screen_name": "TammyAMH", "name": "Tammy H", "id": 968539484, "id_str": "968539484", "indices": [144, 153]}, {"screen_name": "Missin_Florida", "name": "\ud83e\udd8e\ud83c\udf3a\ud83c\udf34Janice\ud83c\udf34\ud83d\ude37\ud83d\udc1a\ud83e\udd88\ud83d\udc0a", "id": 1080406804545327104, "id_str": "1080406804545327104", "indices": [154, 169]}, {"screen_name": "JeffBirkenhaue2", "name": "Jeff Birkenhauer", "id": 1114573637858807809, "id_str": "1114573637858807809", "indices": [170, 186]}, {"screen_name": "AZsunshine_mama", "name": "sunshine", "id": 1093216882046300160, "id_str": "1093216882046300160", "indices": [187, 203]}, {"screen_name": "I_Am_Concerned", "name": "Register NOW & VOTE \ud83c\udf0a #Biden", "id": 458767709, "id_str": "458767709", "indices": [204, 219]}, {"screen_name": "the420reminder", "name": "420 O' Clock", "id": 1254232066683809795, "id_str": "1254232066683809795", "indices": [220, 235]}, {"screen_name": "_jena4n", "name": "JenaSativa420 \ud83d\ude19\ud83d\udca8", "id": 2543557041, "id_str": "2543557041", "indices": [236, 244]}, {"screen_name": "Jennife14860185", "name": "Loca689", "id": 1050479302226132992, "id_str": "1050479302226132992", "indices": [245, 261]}, {"screen_name": "ACannabisQueen", "name": "\ud835\udc00\ud835\udc26\ud835\udc1e\ud835\udc25\ud835\udc22\ud835\udc1a \ud83d\udd25\ud83d\udca8", "id": 861137888838393856, "id_str": "861137888838393856", "indices": [262, 277]}], "symbols": [], "media": [{"id": 1275414347699949569, "id_str": "1275414347699949569", "indices": [278, 301], "media_url": "http://pbs.twimg.com/media/EbMul4AXkAE_8Dy.jpg", "media_url_https": "https://pbs.twimg.com/media/EbMul4AXkAE_8Dy.jpg", "url": "https://t.co/NukEWhSO6D", "display_url": "pic.twitter.com/NukEWhSO6D", "expanded_url": "https://twitter.com/daniels_roc/status/1275414355069329409/photo/1", "type": "photo", "sizes": {"thumb": {"w": 150, "h": 150, "resize": "crop"}, "large": {"w": 1200, "h": 1200, "resize": "fit"}, "small": {"w": 680, "h": 680, "resize": "fit"}, "medium": {"w": 1200, "h": 1200, "resize": "fit"}}}]}, "extended_entities": {"media": [{"id": 1275414347699949569, "id_str": "1275414347699949569", "indices": [278, 301], "media_url": "http://pbs.twimg.com/media/EbMul4AXkAE_8Dy.jpg", "media_url_https": "https://pbs.twimg.com/media/EbMul4AXkAE_8Dy.jpg", "url": "https://t.co/NukEWhSO6D", "display_url": "pic.twitter.com/NukEWhSO6D", "expanded_url": "https://twitter.com/daniels_roc/status/1275414355069329409/photo/1", "type": "photo", "sizes": {"thumb": {"w": 150, "h": 150, "resize": "crop"}, "large": {"w": 1200, "h": 1200, "resize": "fit"}, "small": {"w": 680, "h": 680, "resize": "fit"}, "medium": {"w": 1200, "h": 1200, "resize": "fit"}}}]}}, "quote_count": 0, "reply_count": 0, "retweet_count": 1, "favorite_count": 1, "entities": {"hashtags": [{"text": "Toakers", "indices": [0, 8]}, {"text": "stonerdiaries", "indices": [9, 23]}, {"text": "BenifitsofMarijuana", "indices": [24, 44]}, {"text": "Medicalmarijuana", "indices": [45, 62]}, {"text": "BlackLivesMatter", "indices": [63, 80]}, {"text": "NativeAmericans", "indices": [81, 97]}], "urls": [{"url": "https://t.co/blszMPsjlc", "expanded_url": "https://twitter.com/i/web/status/1275414355069329409", "display_url": "twitter.com/i/web/status/1\u2026", "indices": [116, 139]}], "user_mentions": [{"screen_name": "dalewilliams867", "name": "killing time\ud83c\udf0a\ud83c\udf0a\ud83d\udc4a\ud83d\udc4aveteran", "id": 2510643518, "id_str": "2510643518", "indices": [98, 114]}], "symbols": []}, "favorited": false, "retweeted": false, "possibly_sensitive": false, "filter_level": "low", "lang": "und"}, "is_quote_status": false, "quote_count": 0, "reply_count": 0, "retweet_count": 0, "favorite_count": 0, "entities": {"hashtags": [{"text": "Toakers", "indices": [17, 25]}, {"text": "stonerdiaries", "indices": [26, 40]}, {"text": "BenifitsofMarijuana", "indices": [41, 61]}, {"text": "Medicalmarijuana", "indices": [62, 79]}, {"text": "BlackLivesMatter", "indices": [80, 97]}, {"text": "NativeAmericans", "indices": [98, 114]}], "urls": [], "user_mentions": [{"screen_name": "daniels_roc", "name": "\ud83c\udf34\ud83d\ude0e\ud83d\ude09\ud83d\ude37\ud83c\udf34DanielsRocNY", "id": 1271971892912685063, "id_str": "1271971892912685063", "indices": [3, 15]}, {"screen_name": "dalewilliams867", "name": "killing time\ud83c\udf0a\ud83c\udf0a\ud83d\udc4a\ud83d\udc4aveteran", "id": 2510643518, "id_str": "2510643518", "indices": [115, 131]}], "symbols": []}, "favorited": false, "retweeted": false, "filter_level": "low", "lang": "und", "timestamp_ms": "1592919274669"}
{"created_at": "Tue Jun 23 13:34:34 +0000 2020", "id": 1275421996348735489, "id_str": "1275421996348735489", "text": "RT @cyliazuoy: i can\u2019t believe i just ■■■■ing read these words", "source": "<a href=\"http://twitter.com/download/iphone\" rel=\"nofollow\">Twitter for iPhone</a>", "truncated": false, "in_reply_to_status_id": null, "in_reply_to_status_id_str": null, "in_reply_to_user_id": null, "in_reply_to_user_id_str": null, "in_reply_to_screen_name": null, "user": {"id": 2574444245, "id_str": "2574444245", "name": "honcho", "screen_name": "rjh1ll", "location": "NJ", "url": "https://www.youtube.com/channel/UC0QJ7p5ha82mLgO3MYG30TQ", "description": "Man In The Mirror.", "translator_type": "none", "protected": false, "verified": false, "followers_count": 480, "friends_count": 532, "listed_count": 3, "favourites_count": 38792, "statuses_count": 31194, "created_at": "Sat May 31 21:35:13 +0000 2014", "utc_offset": null, "time_zone": null, "geo_enabled": true, "lang": null, "contributors_enabled": false, "is_translator": false, "profile_background_color": "000000", "profile_background_image_url": "http://abs.twimg.com/images/themes/theme1/bg.png", "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme1/bg.png", "profile_background_tile": false, "profile_link_color": "DD2E44", "profile_sidebar_border_color": "000000", "profile_sidebar_fill_color": "000000", "profile_text_color": "000000", "profile_use_background_image": false, "profile_image_url": "http://pbs.twimg.com/profile_images/1274139262137634816/a7LBv6vc_normal.jpg", "profile_image_url_https": "https://pbs.twimg.com/profile_images/1274139262137634816/a7LBv6vc_normal.jpg", "profile_banner_url": "https://pbs.twimg.com/profile_banners/2574444245/1592224396", "default_profile": false, "default_profile_image": false, "following": null, "follow_request_sent": null, "notifications": null}, "geo": null, "coordinates": null, "place": null, "contributors": null, "retweeted_status": {"created_at": "Mon Jun 22 06:09:15 +0000 2020", "id": 1274947540660572160, "id_str": "1274947540660572160", "text": "i can\u2019t believe i just ■■■■ing read these words", "source": "<a href=\"http://twitter.com/download/iphone\" rel=\"nofollow\">Twitter for iPhone</a>", "truncated": false, "in_reply_to_status_id": null, "in_reply_to_status_id_str": null, "in_reply_to_user_id": null, "in_reply_to_user_id_str": null, "in_reply_to_screen_name": null, "user": {"id": 3799575852, "id_str": "3799575852", "name": "\u2d63", "screen_name": "cyliazuoy", "location": null, "url": null, "description": "she/her \u2022 \ud83c\udde9\ud83c\uddff \u2022 tahia falasteen", "translator_type": "none", "protected": false, "verified": false, "followers_count": 744, "friends_count": 537, "listed_count": 2, "favourites_count": 19118, "statuses_count": 15718, "created_at": "Tue Oct 06 04:55:52 +0000 2015", "utc_offset": null, "time_zone": null, "geo_enabled": true, "lang": null, "contributors_enabled": false, "is_translator": false, "profile_background_color": "C0DEED", "profile_background_image_url": "http://abs.twimg.com/images/themes/theme1/bg.png", "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme1/bg.png", "profile_background_tile": false, "profile_link_color": "1DA1F2", "profile_sidebar_border_color": "C0DEED", "profile_sidebar_fill_color": "DDEEF6", "profile_text_color": "333333", "profile_use_background_image": true, "profile_image_url": "http://pbs.twimg.com/profile_images/1266567168218427392/pL9j7jWp_normal.jpg", "profile_image_url_https": "https://pbs.twimg.com/profile_images/1266567168218427392/pL9j7jWp_normal.jpg", "profile_banner_url": "https://pbs.twimg.com/profile_banners/3799575852/1592368798", "default_profile": true, "default_profile_image": false, "following": null, "follow_request_sent": null, "notifications": null}, "geo": null, "coordinates": null, "place": null, "contributors": null, "quoted_status_id": 1274897643320348674, "quoted_status_id_str": "1274897643320348674", "quoted_status": {"created_at": "Mon Jun 22 02:50:59 +0000 2020", "id": 1274897643320348674, "id_str": "1274897643320348674", "text": "CPD pepper spray a double amputee &amp; steal his prosthetics. Cops steal his prosthetics while man uses his arms to ge\u2026 https://t.co/6Jtf0eAS8F", "display_text_range": [0, 140], "source": "<a href=\"http://twitter.com/download/iphone\" rel=\"nofollow\">Twitter for iPhone</a>", "truncated": true, "in_reply_to_status_id": null, "in_reply_to_status_id_str": null, "in_reply_to_user_id": null, "in_reply_to_user_id_str": null, "in_reply_to_screen_name": null, "user": {"id": 36030521, "id_str": "36030521", "name": "Kevin", "screen_name": "kevi180", "location": "Chicago, IL", "url": null, "description": "Human person being. Opinions are my own. Retweets are not endorsements, usually something I find interesting. #Bernie2020", "translator_type": "none", "protected": false, "verified": false, "followers_count": 699, "friends_count": 5002, "listed_count": 22, "favourites_count": 54494, "statuses_count": 58228, "created_at": "Tue Apr 28 09:52:58 +0000 2009", "utc_offset": null, "time_zone": null, "geo_enabled": true, "lang": null, "contributors_enabled": false, "is_translator": false, "profile_background_color": "000000", "profile_background_image_url": "http://abs.twimg.com/images/themes/theme1/bg.png", "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme1/bg.png", "profile_background_tile": false, "profile_link_color": "19CF86", "profile_sidebar_border_color": "000000", "profile_sidebar_fill_color": "000000", "profile_text_color": "000000", "profile_use_background_image": false, "profile_image_url": "http://pbs.twimg.com/profile_images/691413719406915584/RCBTn2_0_normal.jpg", "profile_image_url_https": "https://pbs.twimg.com/profile_images/691413719406915584/RCBTn2_0_normal.jpg", "profile_banner_url": "https://pbs.twimg.com/profile_banners/36030521/1410844399", "default_profile": false, "default_profile_image": false, "following": null, "follow_request_sent": null, "notifications": null}, "geo": null, "coordinates": null, "place": null, "contributors": null, "is_quote_status": false, "extended_tweet": {"full_text": "CPD pepper spray a double amputee &amp; steal his prosthetics. Cops steal his prosthetics while man uses his arms to get medical help. Crowd rushed police to get legs back. Police are rioting. They have no honor. This is America. #BlackLivesMatter\u00a0 #columbusprotest via #reddit https://t.co/p6CyHjh08S", "display_text_range": [0, 277], "entities": {"hashtags": [{"text": "BlackLivesMatter", "indices": [230, 247]}, {"text": "columbusprotest", "indices": [249, 265]}, {"text": "reddit", "indices": [270, 277]}], "urls": [], "user_mentions": [], "symbols": [], "media": [{"id": 1274897587921997825, "id_str": "1274897587921997825", "indices": [278, 301], "additional_media_info": {"monetizable": false}, "media_url": "http://pbs.twimg.com/ext_tw_video_thumb/1274897587921997825/pu/img/pwbsdZVi4hxe-OEe.jpg", "media_url_https": "https://pbs.twimg.com/ext_tw_video_thumb/1274897587921997825/pu/img/pwbsdZVi4hxe-OEe.jpg", "url": "https://t.co/p6CyHjh08S", "display_url": "pic.twitter.com/p6CyHjh08S", "expanded_url": "https://twitter.com/kevi180/status/1274897643320348674/video/1", "type": "video", "video_info": {"aspect_ratio": [16, 9], "duration_millis": 23733, "variants": [{"bitrate": 256000, "content_type": "video/mp4", "url": "https://video.twimg.com/ext_tw_video/1274897587921997825/pu/vid/480x270/h0YPtzthSyrSDRtu.mp4?tag=10"}, {"bitrate": 832000, "content_type": "video/mp4", "url": "https://video.twimg.com/ext_tw_video/1274897587921997825/pu/vid/640x360/Oo_2X0yA8mEyBUJM.mp4?tag=10"}, {"content_type": "application/x-mpegURL", "url": "https://video.twimg.com/ext_tw_video/1274897587921997825/pu/pl/gxAWWgFVb0-quRaH.m3u8?tag=10"}, {"bitrate": 2176000, "content_type": "video/mp4", "url": "https://video.twimg.com/ext_tw_video/1274897587921997825/pu/vid/1280x720/YBOm8L1ecV9F76SB.mp4?tag=10"}]}, "sizes": {"thumb": {"w": 150, "h": 150, "resize": "crop"}, "medium": {"w": 1200, "h": 675, "resize": "fit"}, "small": {"w": 680, "h": 383, "resize": "fit"}, "large": {"w": 1280, "h": 720, "resize": "fit"}}}]}, "extended_entities": {"media": [{"id": 1274897587921997825, "id_str": "1274897587921997825", "indices": [278, 301], "additional_media_info": {"monetizable": false}, "media_url": "http://pbs.twimg.com/ext_tw_video_thumb/1274897587921997825/pu/img/pwbsdZVi4hxe-OEe.jpg", "media_url_https": "https://pbs.twimg.com/ext_tw_video_thumb/1274897587921997825/pu/img/pwbsdZVi4hxe-OEe.jpg", "url": "https://t.co/p6CyHjh08S", "display_url": "pic.twitter.com/p6CyHjh08S", "expanded_url": "https://twitter.com/kevi180/status/1274897643320348674/video/1", "type": "video", "video_info": {"aspect_ratio": [16, 9], "duration_millis": 23733, "variants": [{"bitrate": 256000, "content_type": "video/mp4", "url": "https://video.twimg.com/ext_tw_video/1274897587921997825/pu/vid/480x270/h0YPtzthSyrSDRtu.mp4?tag=10"}, {"bitrate": 832000, "content_type": "video/mp4", "url": "https://video.twimg.com/ext_tw_video/1274897587921997825/pu/vid/640x360/Oo_2X0yA8mEyBUJM.mp4?tag=10"}, {"content_type": "application/x-mpegURL", "url": "https://video.twimg.com/ext_tw_video/1274897587921997825/pu/pl/gxAWWgFVb0-quRaH.m3u8?tag=10"}, {"bitrate": 2176000, "content_type": "video/mp4", "url": "https://video.twimg.com/ext_tw_video/1274897587921997825/pu/vid/1280x720/YBOm8L1ecV9F76SB.mp4?tag=10"}]}, "sizes": {"thumb": {"w": 150, "h": 150, "resize": "crop"}, "medium": {"w": 1200, "h": 675, "resize": "fit"}, "small": {"w": 680, "h": 383, "resize": "fit"}, "large": {"w": 1280, "h": 720, "resize": "fit"}}}]}}, "quote_count": 645, "reply_count": 65, "retweet_count": 5446, "favorite_count": 7174, "entities": {"hashtags": [], "urls": [{"url": "https://t.co/6Jtf0eAS8F", "expanded_url": "https://twitter.com/i/web/status/1274897643320348674", "display_url": "twitter.com/i/web/status/1\u2026", "indices": [121, 144]}], "user_mentions": [], "symbols": []}, "favorited": false, "retweeted": false, "possibly_sensitive": false, "filter_level": "low", "lang": "en"}, "quoted_status_permalink": {"url": "https://t.co/TlkOET2qvP", "expanded": "https://twitter.com/kevi180/status/1274897643320348674", "display": "twitter.com/kevi180/status\u2026"}, "is_quote_status": true, "quote_count": 101, "reply_count": 47, "retweet_count": 29055, "favorite_count": 51610, "entities": {"hashtags": [], "urls": [], "user_mentions": [], "symbols": []}, "favorited": false, "retweeted": false, "filter_level": "low", "lang": "en"}, "quoted_status_id": 1274897643320348674, "quoted_status_id_str": "1274897643320348674", "quoted_status": {"created_at": "Mon Jun 22 02:50:59 +0000 2020", "id": 1274897643320348674, "id_str": "1274897643320348674", "text": "CPD pepper spray a double amputee &amp; steal his prosthetics. Cops steal his prosthetics while man uses his arms to ge\u2026 https://t.co/6Jtf0eAS8F", "display_text_range": [0, 140], "source": "<a href=\"http://twitter.com/download/iphone\" rel=\"nofollow\">Twitter for iPhone</a>", "truncated": true, "in_reply_to_status_id": null, "in_reply_to_status_id_str": null, "in_reply_to_user_id": null, "in_reply_to_user_id_str": null, "in_reply_to_screen_name": null, "user": {"id": 36030521, "id_str": "36030521", "name": "Kevin", "screen_name": "kevi180", "location": "Chicago, IL", "url": null, "description": "Human person being. Opinions are my own. Retweets are not endorsements, usually something I find interesting. #Bernie2020", "translator_type": "none", "protected": false, "verified": false, "followers_count": 699, "friends_count": 5002, "listed_count": 22, "favourites_count": 54494, "statuses_count": 58228, "created_at": "Tue Apr 28 09:52:58 +0000 2009", "utc_offset": null, "time_zone": null, "geo_enabled": true, "lang": null, "contributors_enabled": false, "is_translator": false, "profile_background_color": "000000", "profile_background_image_url": "http://abs.twimg.com/images/themes/theme1/bg.png", "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme1/bg.png", "profile_background_tile": false, "profile_link_color": "19CF86", "profile_sidebar_border_color": "000000", "profile_sidebar_fill_color": "000000", "profile_text_color": "000000", "profile_use_background_image": false, "profile_image_url": "http://pbs.twimg.com/profile_images/691413719406915584/RCBTn2_0_normal.jpg", "profile_image_url_https": "https://pbs.twimg.com/profile_images/691413719406915584/RCBTn2_0_normal.jpg", "profile_banner_url": "https://pbs.twimg.com/profile_banners/36030521/1410844399", "default_profile": false, "default_profile_image": false, "following": null, "follow_request_sent": null, "notifications": null}, "geo": null, "coordinates": null, "place": null, "contributors": null, "is_quote_status": false, "extended_tweet": {"full_text": "CPD pepper spray a double amputee &amp; steal his prosthetics. Cops steal his prosthetics while man uses his arms to get medical help. Crowd rushed police to get legs back. Police are rioting. They have no honor. This is America. #BlackLivesMatter\u00a0 #columbusprotest via #reddit https://t.co/p6CyHjh08S", "display_text_range": [0, 277], "entities": {"hashtags": [{"text": "BlackLivesMatter", "indices": [230, 247]}, {"text": "columbusprotest", "indices": [249, 265]}, {"text": "reddit", "indices": [270, 277]}], "urls": [], "user_mentions": [], "symbols": [], "media": [{"id": 1274897587921997825, "id_str": "1274897587921997825", "indices": [278, 301], "additional_media_info": {"monetizable": false}, "media_url": "http://pbs.twimg.com/ext_tw_video_thumb/1274897587921997825/pu/img/pwbsdZVi4hxe-OEe.jpg", "media_url_https": "https://pbs.twimg.com/ext_tw_video_thumb/1274897587921997825/pu/img/pwbsdZVi4hxe-OEe.jpg", "url": "https://t.co/p6CyHjh08S", "display_url": "pic.twitter.com/p6CyHjh08S", "expanded_url": "https://twitter.com/kevi180/status/1274897643320348674/video/1", "type": "video", "video_info": {"aspect_ratio": [16, 9], "duration_millis": 23733, "variants": [{"bitrate": 256000, "content_type": "video/mp4", "url": "https://video.twimg.com/ext_tw_video/1274897587921997825/pu/vid/480x270/h0YPtzthSyrSDRtu.mp4?tag=10"}, {"bitrate": 832000, "content_type": "video/mp4", "url": "https://video.twimg.com/ext_tw_video/1274897587921997825/pu/vid/640x360/Oo_2X0yA8mEyBUJM.mp4?tag=10"}, {"content_type": "application/x-mpegURL", "url": "https://video.twimg.com/ext_tw_video/1274897587921997825/pu/pl/gxAWWgFVb0-quRaH.m3u8?tag=10"}, {"bitrate": 2176000, "content_type": "video/mp4", "url": "https://video.twimg.com/ext_tw_video/1274897587921997825/pu/vid/1280x720/YBOm8L1ecV9F76SB.mp4?tag=10"}]}, "sizes": {"thumb": {"w": 150, "h": 150, "resize": "crop"}, "medium": {"w": 1200, "h": 675, "resize": "fit"}, "small": {"w": 680, "h": 383, "resize": "fit"}, "large": {"w": 1280, "h": 720, "resize": "fit"}}}]}, "extended_entities": {"media": [{"id": 1274897587921997825, "id_str": "1274897587921997825", "indices": [278, 301], "additional_media_info": {"monetizable": false}, "media_url": "http://pbs.twimg.com/ext_tw_video_thumb/1274897587921997825/pu/img/pwbsdZVi4hxe-OEe.jpg", "media_url_https": "https://pbs.twimg.com/ext_tw_video_thumb/1274897587921997825/pu/img/pwbsdZVi4hxe-OEe.jpg", "url": "https://t.co/p6CyHjh08S", "display_url": "pic.twitter.com/p6CyHjh08S", "expanded_url": "https://twitter.com/kevi180/status/1274897643320348674/video/1", "type": "video", "video_info": {"aspect_ratio": [16, 9], "duration_millis": 23733, "variants": [{"bitrate": 256000, "content_type": "video/mp4", "url": "https://video.twimg.com/ext_tw_video/1274897587921997825/pu/vid/480x270/h0YPtzthSyrSDRtu.mp4?tag=10"}, {"bitrate": 832000, "content_type": "video/mp4", "url": "https://video.twimg.com/ext_tw_video/1274897587921997825/pu/vid/640x360/Oo_2X0yA8mEyBUJM.mp4?tag=10"}, {"content_type": "application/x-mpegURL", "url": "https://video.twimg.com/ext_tw_video/1274897587921997825/pu/pl/gxAWWgFVb0-quRaH.m3u8?tag=10"}, {"bitrate": 2176000, "content_type": "video/mp4", "url": "https://video.twimg.com/ext_tw_video/1274897587921997825/pu/vid/1280x720/YBOm8L1ecV9F76SB.mp4?tag=10"}]}, "sizes": {"thumb": {"w": 150, "h": 150, "resize": "crop"}, "medium": {"w": 1200, "h": 675, "resize": "fit"}, "small": {"w": 680, "h": 383, "resize": "fit"}, "large": {"w": 1280, "h": 720, "resize": "fit"}}}]}}, "quote_count": 645, "reply_count": 65, "retweet_count": 5446, "favorite_count": 7174, "entities": {"hashtags": [], "urls": [{"url": "https://t.co/6Jtf0eAS8F", "expanded_url": "https://twitter.com/i/web/status/1274897643320348674", "display_url": "twitter.com/i/web/status/1\u2026", "indices": [121, 144]}], "user_mentions": [], "symbols": []}, "favorited": false, "retweeted": false, "possibly_sensitive": false, "filter_level": "low", "lang": "en"}, "quoted_status_permalink": {"url": "https://t.co/TlkOET2qvP", "expanded": "https://twitter.com/kevi180/status/1274897643320348674", "display": "twitter.com/kevi180/status\u2026"}, "is_quote_status": true, "quote_count": 0, "reply_count": 0, "retweet_count": 0, "favorite_count": 0, "entities": {"hashtags": [], "urls": [], "user_mentions": [{"screen_name": "cyliazuoy", "name": "\u2d63", "id": 3799575852, "id_str": "3799575852", "indices": [3, 13]}], "symbols": []}, "favorited": false, "retweeted": false, "filter_level": "low", "lang": "en", "timestamp_ms": "1592919274781"}

These are both taken from the full jsonl files via command line. Thank your for your help

Edit: post with full code etc is currently awaiting moderation. Sorry. for any confusion

Thanks @javierluraschi. I've included a reproducible example below, following some of the guidelines. I updated sparklyr but that didn't solve the issue, and I'm currently using version 1.3.1 with Spark 2.3.3.

library(sparklyr)
library(sparklyr.nested)
library(dplyr)

sc <- spark_connect(master = "local")

june <- spark_read_json(sc, "janetweets_june24.jsonl")

head("janetweets_june24.jsonl", 2) %>% deparse()
# [1] "\"janetweets_june24.jsonl\""

june1 <- spark_read_json(sc, "june1-aa.jsonl")
head(june1, 2) %>% deparse()
"structure(list(src = structure(list(con = structure(list(master = \"local[16]\", "                                                                                                                                                 
"    method = \"shell\", app_name = \"sparklyr\", config = structure(list("                                                                                                                                                         
"        spark.env.SPARK_LOCAL_IP.local = \"127.0.0.1\", sparklyr.connect.csv.embedded = \"^1.*\", "                                                                                                                                
"        spark.sql.legacy.utcTimestampFunc.enabled = TRUE, sparklyr.connect.cores.local = 16L, "                                                                                                                                    
"        spark.sql.shuffle.partitions.local = 16L, \"sparklyr.shell.driver-memory\" = \"2g\"), config = \"default\", file = \"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/sparklyr/conf/config-template.yml\"), "
"    state = <environment>, extensions = list(jars = character(0), "                                                                                                                                                                
"        packages = character(0), initializers = list(), catalog_jars = character(0), "                                                                                                                                             
"        repositories = character(0)), spark_home = \"/Users/janecronin/spark/spark-2.3.3-bin-hadoop2.7\", "                                                                                                                        
"    backend = structure(4L, class = c(\"sockconn\", \"connection\", "                                                                                                                                                              
"    \"shell_backend\"), conn_id = <pointer: 0x240>), monitoring = structure(5L, class = c(\"sockconn\", "                                                                                                                          
"    \"connection\", \"shell_backend\"), conn_id = <pointer: 0x241>), "                                                                                                                                                             
"    gateway = structure(3L, class = c(\"sockconn\", \"connection\""                                                                                                                                                                
"    ), conn_id = <pointer: 0x23f>), output_file = \"/var/folders/2_/4bxblxq142gc4stc28nfcwmw0000gn/T//RtmpszilKL/file4c55108ce0e9_spark.log\", "                                                                                   
"    sessionId = 28753, home_version = \"2.3.3\"), class = c(\"spark_connection\", "                                                                                                                                                
"\"spark_shell_connection\", \"DBIConnection\"))), class = c(\"src_spark\", "                                                                                                                                                       
"\"src_sql\", \"src\")), ops = structure(list(name = \"head\", x = structure(list("                                                                                                                                                 
"    x = structure(\"june1aa\", class = c(\"ident\", \"character\")), "                                                                                                                                                             
"    vars = c(\"contributors\", \"coordinates\", \"created_at\", \"display_text_range\", "                                                                                                                                          
"    \"entities\", \"extended_entities\", \"extended_tweet\", \"favorite_count\", "                                                                                                                                                 
"    \"favorited\", \"filter_level\", \"geo\", \"id\", \"id_str\", \"in_reply_to_screen_name\", "                                                                                                                                   
"    \"in_reply_to_status_id\", \"in_reply_to_status_id_str\", \"in_reply_to_user_id\", "                                                                                                                                           
"    \"in_reply_to_user_id_str\", \"is_quote_status\", \"lang\", \"place\", "                                                                                                                                                       
"    \"possibly_sensitive\", \"quote_count\", \"quoted_status\", \"quoted_status_id\", "                                                                                                                                            
"    \"quoted_status_id_str\", \"quoted_status_permalink\", \"reply_count\", "                                                                                                                                                      
"    \"retweet_count\", \"retweeted\", \"retweeted_status\", \"source\", "                                                                                                                                                          
"    \"text\", \"timestamp_ms\", \"truncated\", \"user\")), class = c(\"op_base_remote\", "                                                                                                                                         
"\"op_base\", \"op\")), dots = list(), args = list(n = 2)), class = c(\"op_head\", "                                                                                                                                                
"\"op_single\", \"op\"))), class = c(\"tbl_spark\", \"tbl_sql\", \"tbl_lazy\", "                                                                                                                                                    
"\"tbl\"))" 
glimpse(june1)

The following errors are returned:

glimpse(june1)
Rows: ??
Error: java.lang.NullPointerException
	at sparklyr.Collectors$.collectLongArr(collectors.scala:87)
	at sparklyr.Collectors$$anonfun$mkColumnCtx$17.apply(collectors.scala:224)
	at sparklyr.Collectors$$anonfun$mkColumnCtx$17.apply(collectors.scala:224)
	at sparklyr.Collectors$ColumnCtx.collect(collectors.scala:183)
	at sparklyr.Utils$.sparklyr$Utils$$collectRows(utils.scala:90)
	at sparklyr.Utils$.collect(utils.scala:114)
	at sparklyr.Utils.collect(utils.scala)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at sparklyr.Invoke.invoke(invoke.scala:147)
	at sparklyr.StreamHandler.handleMethodCall(stream.scala:136)
	at sparklyr.StreamHandler.read(stream.scala:61)
	at sparklyr.BackendHandler$$anonfun$channelRead0$1.apply$mcV$sp(handler.scala:58)
	at scala.util.control.Breaks.breakable(Breaks.scala:38)
	at sparklyr.BackendHandler.channelRead0(handler.scala:38)
	at sparklyr.BackendHandler.channelRead0(handler.scala:14)
	at io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:105)
	at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:362)
	at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348)
	at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:340)
	at io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:102)
	at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:362)
	at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348)
	at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:340)
	at io.netty.handler.codec.ByteToMessageDecoder.fireChannelRead(ByteToMessageDecoder.java:310)
	at io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:284)
	at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:362)
	at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348)
	at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:340)
	at io.netty.channel.DefaultChannelPipeline$HeadContext.channelRead(DefaultChannelPipeline.java:1359)
	at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:362)
	at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348)
	at io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:935)
	at io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:138)
	at io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:645)
	at io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:580)
	at io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:497)
	at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:459)
	at io.netty.util.concurrent.SingleThreadEventExecutor$5.run(SingleThreadEventExecutor.java:858)
	at io.netty.util.concurrent.DefaultThreadFactory$DefaultRunnableDecorator.run(DefaultThreadFactory.java:138)
	at java.lang.Thread.run(Thread.java:748)

And

june <- spark_read_json(sc, "janetweets_june24.jsonl")
Error: org.apache.spark.sql.AnalysisException: Since Spark 2.3, the queries from raw JSON/CSV files are disallowed when the
referenced columns only include the internal corrupt record column
(named _corrupt_record by default). For example:
spark.read.schema(schema).json(file).filter($"_corrupt_record".isNotNull).count()
and spark.read.schema(schema).json(file).select("_corrupt_record").show().
Instead, you can cache or save the parsed results and then send the same query.
For example, val df = spark.read.schema(schema).json(file).cache() and then
df.filter($"_corrupt_record".isNotNull).count().;
	at org.apache.spark.sql.execution.datasources.json.JsonFileFormat.buildReader(JsonFileFormat.scala:118)
	at org.apache.spark.sql.execution.datasources.FileFormat$class.buildReaderWithPartitionValues(FileFormat.scala:129)
	at org.apache.spark.sql.execution.datasources.TextBasedFileFormat.buildReaderWithPartitionValues(FileFormat.scala:160)
	at org.apache.spark.sql.execution.FileSourceScanExec.inputRDD$lzycompute(DataSourceScanExec.scala:294)
	at org.apache.spark.sql.execution.FileSourceScanExec.inputRDD(DataSourceScanExec.scala:290)
	at org.apache.spark.sql.execution.FileSourceScanExec.inputRDDs(DataSourceScanExec.scala:312)
	at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:610)
	at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131)
	at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127)
	at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
	at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152)
	at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127)
	at org.apache.spark.sql.execution.columnar.InMemoryRelation.buildBuffers(InMemoryRelation.scala:107)
	at org.apache.spark.sql.execution.columnar.InMemoryRelation.<init>(InMemoryRelation.scala:102)
	at org.apache.spark.sql.execution.columnar.InMemoryRelation$.apply(InMemoryRelation.scala:43)
	at org.apache.spark.sql.execution.CacheManager$$anonfun$cacheQuery$1.apply(CacheManager.scala:97)
	at org.apache.spark.sql.execution.CacheManager.writeLock(CacheManager.scala:67)
	at org.apache.spark.sql.execution.CacheManager.cacheQuery(CacheManager.scala:91)
	at org.apache.spark.sql.internal.CatalogImpl.cacheTable(CatalogImpl.scala:420)
	at org.apache.spark.sql.execution.command.CacheTableCommand.run(cache.scala:39)
	at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
	at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
	at org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:79)
	at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:190)
	at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:190)
	at org.apache.spark.sql.Dataset$$anonfun$52.apply(Dataset.scala:3259)
	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:77)
	at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3258)
	at org.apache.spark.sql.Dataset.<init>(Dataset.scala:190)
	at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:75)
	at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:642)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at sparklyr.Invoke.invoke(invoke.scala:147)
	at sparklyr.StreamHandler.handleMethodCall(stream.scala:136)
	at sparklyr.StreamHandler.read(stream.scala:61)
	at sparklyr.BackendHandler$$anonfun$channelRead0$1.apply$mcV$sp(handler.scala:58)
	at scala.util.control.Breaks.breakable(Breaks.scala:38)
	at sparklyr.BackendHandler.channelRead0(handler.scala:38)
	at sparklyr.BackendHandler.channelRead0(handler.scala:14)
	at io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:105)
	at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:362)
	at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348)
	at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:340)
	at io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:102)
	at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:362)
	at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348)
	at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:340)
	at io.netty.handler.codec.ByteToMessageDecoder.fireChannelRead(ByteToMessageDecoder.java:310)
	at io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:284)
	at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:362)
	at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348)
	at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:340)
	at io.netty.channel.DefaultChannelPipeline$HeadContext.channelRead(DefaultChannelPipeline.java:1359)
	at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:362)
	at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348)
	at io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:935)
	at io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:138)
	at io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:645)
	at io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:580)
	at io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:497)
	at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:459)
	at io.netty.util.concurrent.SingleThreadEventExecutor$5.run(SingleThreadEventExecutor.java:858)
	at io.netty.util.concurrent.DefaultThreadFactory$DefaultRunnableDecorator.run(DefaultThreadFactory.java:138)
	at java.lang.Thread.run(Thread.java:748)

@javierluraschi Would you be able to share the code for how you read those in?

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.