Format date: returns NAs

Hello,

I tried to format my date in the dataset. All I had as a result was NA.


library(tidyverse) 
library(plyr)
#> -------------------------------------------------------------------------
#> You have loaded plyr after dplyr - this is likely to cause problems.
#> If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
#> library(plyr); library(dplyr)
#> -------------------------------------------------------------------------
#> 
#> Attaching package: 'plyr'
#> The following objects are masked from 'package:dplyr':
#> 
#>     arrange, count, desc, failwith, id, mutate, rename, summarise,
#>     summarize
#> The following object is masked from 'package:purrr':
#> 
#>     compact
library(dplyr)
library(ggplot2)
library(data.table)
#> 
#> Attaching package: 'data.table'
#> The following objects are masked from 'package:dplyr':
#> 
#>     between, first, last
#> The following object is masked from 'package:purrr':
#> 
#>     transpose
library(reprex)
library(lubridate)
#> 
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:data.table':
#> 
#>     hour, isoweek, mday, minute, month, quarter, second, wday,
#>     week, yday, year
#> The following object is masked from 'package:plyr':
#> 
#>     here
#> The following object is masked from 'package:base':
#> 
#>     date
library(arules)
#> Loading required package: Matrix
#> 
#> Attaching package: 'Matrix'
#> The following object is masked from 'package:tidyr':
#> 
#>     expand
#> 
#> Attaching package: 'arules'
#> The following object is masked from 'package:dplyr':
#> 
#>     recode
#> The following objects are masked from 'package:base':
#> 
#>     abbreviate, write
library(arulesViz)
#> Loading required package: grid

datt1 = structure(list(CUSTOMER_NUMBER = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 
                                           0L, 0L, 0L), Accounting_Date = c("2017-03-15", "2017-03-15", 
                                                                            "2018-01-23", "2018-01-23", "2018-01-05", "2018-03-15", "2018-01-23", 
                                                                            "2017-09-13", "2017-04-03", "2017-09-18"), PRODUCT_SUB_LINE_DESCR = c("SUNDRY", 
                                                                                                                                                  "SUNDRY", "SUNDRY", "SUNDRY", "SUNDRY", "SUNDRY", "SUNDRY", "SUNDRY", 
                                                                                                                                                  "SUNDRY", "SUNDRY"), ITEM_CATEGORY_DESCR = c("BURS CARBIDE SURGICAL", 
                                                                                                                                                                                               "X-RAY HOLDING DEVICES - REUSABLE", "DENTAL FLOSS", "X-RAY FILM INTRAORAL", 
                                                                                                                                                                                               "TOOTHPASTE PRESCRIPTION AND SPECIALTY", "ENDO AND IRRIGATION SYRINGE NEEDLES", 
                                                                                                                                                                                               "STICKY WAX", "CROWN AND BRIDGE REMOVERS", "CURING LIGHT LED", 
                                                                                                                                                                                               "ORAL CUP LINERS")), class = c("data.table", "data.frame"), row.names = c(NA, 
                                                                                                                                                                                                                                                                         -10L))


View(datt1)
str(datt1)
#> Classes 'data.table' and 'data.frame':   10 obs. of  4 variables:
#>  $ CUSTOMER_NUMBER       : int  0 0 0 0 0 0 0 0 0 0
#>  $ Accounting_Date       : chr  "2017-03-15" "2017-03-15" "2018-01-23" "2018-01-23" ...
#>  $ PRODUCT_SUB_LINE_DESCR: chr  "SUNDRY" "SUNDRY" "SUNDRY" "SUNDRY" ...
#>  $ ITEM_CATEGORY_DESCR   : chr  "BURS CARBIDE SURGICAL" "X-RAY HOLDING DEVICES - REUSABLE" "DENTAL FLOSS" "X-RAY FILM INTRAORAL" ...
New_Date <- as.Date(format(as.Date(datt1$Accounting_Date, "%y-%m-%d"), "%m/%d/%y"))

New_Date
#>  [1] NA NA NA NA NA NA NA NA NA NA

Created on 2018-07-28 by the reprex
package
(v0.2.0).

A good way to debug problems like this is to strip off one layer of nested operations at a time, until you find where things go wrong:

Accounting_Date <- c("2017-03-15", "2017-03-15", "2018-01-23", "2018-01-23", 
                     "2018-01-05", "2018-03-15", "2018-01-23", "2017-09-13", 
                     "2017-04-03", "2017-09-18")

as.Date(format(as.Date(Accounting_Date, "%y-%m-%d"), "%m/%d/%y"))
#>  [1] NA NA NA NA NA NA NA NA NA NA

format(as.Date(Accounting_Date, "%y-%m-%d"), "%m/%d/%y")
#>  [1] NA NA NA NA NA NA NA NA NA NA

as.Date(Accounting_Date, "%y-%m-%d")
#>  [1] NA NA NA NA NA NA NA NA NA NA

The problem here is that "%y-%m-%d" is the wrong format string — %y is a two-digit year. You want "%Y-%m-%d". See: https://www.rdocumentation.org/packages/base/versions/3.5.1/topics/strptime

as.Date(Accounting_Date, "%Y-%m-%d")
#>  [1] "2017-03-15" "2017-03-15" "2018-01-23" "2018-01-23" "2018-01-05"
#>  [6] "2018-03-15" "2018-01-23" "2017-09-13" "2017-04-03" "2017-09-18"

format(as.Date(Accounting_Date, "%Y-%m-%d"), "%m/%d/%y")
#>  [1] "03/15/17" "03/15/17" "01/23/18" "01/23/18" "01/05/18" "03/15/18"
#>  [7] "01/23/18" "09/13/17" "04/03/17" "09/18/17"

# Fixing the first error reveals a new problem!
as.Date(format(as.Date(Accounting_Date, "%Y-%m-%d"), "%m/%d/%y"))
#> Error in charToDate(x): character string is not in a standard unambiguous format

as.Date(format(as.Date(Accounting_Date, "%Y-%m-%d"), "%m/%d/%y"), format = "%m/%d/%y")
#>  [1] "2017-03-15" "2017-03-15" "2018-01-23" "2018-01-23" "2018-01-05"
#>  [6] "2018-03-15" "2018-01-23" "2017-09-13" "2017-04-03" "2017-09-18"

But this sequence of operations doesn't make much sense — as you might suspect from the output of the last line, it's not actually accomplishing anything:

Accounting_Date_YMD <- as.Date(Accounting_Date, "%Y-%m-%d")
Accounting_Date_MDY <- as.Date(format(as.Date(Accounting_Date, "%Y-%m-%d"), "%m/%d/%y"), format = "%m/%d/%y")

str(Accounting_Date_MDY)
#>  Date[1:10], format: "2017-03-15" "2017-03-15" "2018-01-23" "2018-01-23" "2018-01-05" ...
str(Accounting_Date_YMD)
#>  Date[1:10], format: "2017-03-15" "2017-03-15" "2018-01-23" "2018-01-23" "2018-01-05" ...

Storing a date as a date in a different presentation format is pretty much meaningless. That's because internally, a Date is just a number with a Date class attribute:

unclass(Accounting_Date_MDY)
#>  [1] 17240 17240 17554 17554 17536 17605 17554 17422 17259 17427
unclass(Accounting_Date_YMD)
#>  [1] 17240 17240 17554 17554 17536 17605 17554 17422 17259 17427

The number represents the number of days since a specific date — for R, that origin date happens to be 1970-01-01 (see: https://www.rdocumentation.org/packages/base/versions/3.5.1/topics/Dates)

Basically, you have two choices:

  • Store the dates in Date format. R will display the values in YYYY-MM-DD format, but they can be converted when needed (e.g., for printing in a table or in plotting code) into a different presentation format. The presentation format will always be a character string, so you only make this conversion at the last minute when analysis is done.
  • Store the dates as character strings in your preferred presentation format. This is almost never what you want to do, since the dates won't work properly in analyses or plots.
4 Likes