Convert Currency String to int, and int to date

How to Convert string (currency) to int and int to date correctly .:
in the case of int to date
I follow the post and I can't get the result"

library(data.table)
y=fread('"Award Date" "Award Type" "Grant Amount"
           20200511      grant       $6,000
           20200429      grant      $10,000
           20200422      grant      $10,000
           20200504      grant      $10,000
           20200417       loan           $0
           20200409      grant     $575,202
           20200409      grant   $4,003,664
           20200409      grant     $350,736
           20200409      grant     $790,589
           20200409      grant   $2,356,884
           20200409      grant  $11,175,698
           20200409      grant   $1,560,413
 ',colClasses=list(factor=2:3))

str(y)
y[ ,(1):=as.Date(as.character(.SD[,1]),format="%Y%m%d"),]
y[ ,(3):=as.numeric(gsub("[$,]","",.SD[,3]))

>Warning message:
In eval(jsub, SDenv, parent.frame()) : NAs introduced by coercion

It is interesting to refer to the columns by their position or number and not by name.
Have I used the syntax correctly?

Working with the original data

y<-fread("https://data.covidstimuluswatch.org/prog.php?&detail=export_csv",select=c(3:5));
 Downloaded 3324898 bytes...
> y[1:10,gsub("[$,]","",.SD[,3]),]
[1] "c(\"6000\" \"10000\" \"10000\" \"10000\" \"0\" \"575202\" \"4003664\" \"350736\" \"790589\" \"2356884\")"

And in this way everything works.

x <- read.csv("https://data.covidstimuluswatch.org/prog.php?&detail=export_csv")[,3:5];
> x[,3] <- as.numeric(gsub("[$,]","",x[,3]));
> x <- x[(x[,1]>20200400)&x[,3]>0,];
> x[,1] <- as.Date(as.character(x[,1]),"%Y%m%d");
> 
> x
    Award.Date Award.Type Grant.Amount
1   2020-05-11      grant         6000
2   2020-04-29      grant        10000
3   2020-04-22      grant        10000
4   2020-05-04      grant        10000
......
772 2020-05-08      grant      9598320
773 2020-05-08      grant     60843431
774 2020-05-08      grant     10117948
 [ reached 'max' / getOption("max.print") -- omitted 6288 rows ]
> 

But the intention is to reproduce it with the data.table package

I follow the guide:
https://cran.r-project.org/doc/FAQ/R-FAQ.html#How-do-I-convert-factors-to-numeric_003f
I have tried, without results:

Here it works:

x <- read.csv("https://data.covidstimuluswatch.org/prog.php?&detail=export_csv")[,3:5];
> str(x)
'data.frame':	10540 obs. of  3 variables:
 $ Award.Date  : int  20200514 20200514 20200514 20200514 20200514 20200514 20200514 20200514 20200514 20200514 ...
 $ Award.Type  : Factor w/ 2 levels "grant","loan": 1 1 1 1 1 1 1 1 1 1 ...
 $ Grant.Amount: Factor w/ 9948 levels "$0","$1,000,090",..: 9157 6388 1280 6501 2141 4730 563 4999 6934 3373 ...
x[,3] <- as.numeric(gsub("[$,]","",x[,3]));
x <- x[(x[,1]>20200400)&x[,3]>0,];
x[,1] <- as.Date(as.character(x[,1]),"%Y%m%d");
x

And here it doesn't work:

srt(x) equals to srt(y) when using fread (..., stringsAsFactors = TRUE)

y<-fread("https://data.covidstimuluswatch.org/prog.php?&detail=export_csv",stringsAsFactors=TRUE)[,3:5];
str(y)
Classes ‘data.table’ and 'data.frame':	10540 obs. of  3 variables:
 $ Award Date  : int  20200514 20200514 20200514 20200514 20200514 20200514 20200514 20200514 20200514 20200514 ...
 $ Award Type  : Factor w/ 2 levels "grant","loan": 1 1 1 1 1 1 1 1 1 1 ...
 $ Grant Amount: Factor w/ 9948 levels "$0","$1,000,090",..: 9157 6388 1280 6501 2141 4730 563 4999 6934 3373 ...
 - attr(*, ".internal.selfref")=<externalptr> 
> y[,gsub("[$,]","",.SD[1,3])]
[1] "9157"
> y[,factor(gsub("[$,]","",.SD[1,3]))]
[1] 9157
Levels: 9157
> y[,class(gsub("[$,]","",.SD[1,3]))]
[1] "character"
> y[,as.numeric(gsub("[$,]","",.SD[1,3]))]
[1] 9157
> y[1]
   Award Date Award Type Grant Amount
1:   20200514      grant     $852,780
> y[1,3]
   Grant Amount
1:     $852,780
> y[,as.numeric(paste(.SD[1,3])),]
[1] 9157
> y [,as.numeric(as.character(.SD[1,3])),]
[1] 9157
> y[,levels(gsub("[$,]","",.SD[1,3]))]
NULL

with the above result I can't apply:
More efficient, but harder to remember, is

as.numeric(levels(f))[as.integer(f)]

I can't translate the syntax to data.table
Any comments, help, ride is welcome,

After entering the categorical data (factor), I managed to understand how to deal with the solution:

> rm(list = ls())
> x <- read.csv("https://data.covidstimuluswatch.org/prog.php?&detail=export_csv")[,3:5];
> str(x)
'data.frame':	10673 obs. of  3 variables:
 $ Award.Date  : int  20200514 20200514 20200514 20200514 20200514 20200514 20200514 20200514 20200514 20200514 ...
 $ Award.Type  : Factor w/ 2 levels "grant","loan": 1 1 1 1 1 1 1 1 1 1 ...
 $ Grant.Amount: Factor w/ 10071 levels "$0","$1,000,090",..: 9276 6482 1292 6595 2163 4814 567 5084 7031 3425 ...
> y<-fread("https://data.covidstimuluswatch.org/prog.php?&detail=export_csv",select=c(3:5),stringsAsFactors=FALSE,
+          colClasses=list(factor=4:5,integer=3) ,check.names=TRUE)
 Downloaded 5122619 bytes...> str(y)
Classes ‘data.table’ and 'data.frame':	10673 obs. of  3 variables:
 $ Award.Date  : int  20200514 20200514 20200514 20200514 20200514 20200514 20200514 20200514 20200514 20200514 ...
 $ Award.Type  : Factor w/ 2 levels "grant","loan": 1 1 1 1 1 1 1 1 1 1 ...
 $ Grant.Amount: Factor w/ 10071 levels "$0","$1,000,090",..: 9276 6482 1292 6595 2163 4814 567 5084 7031 3425 ...
 - attr(*, ".internal.selfref")=<externalptr> 
> all(mapply(all.equal, x, y))
[1] TRUE
> x[,3] <- as.numeric(gsub("[$,]","",x[,3]));
> x <- x[(x[,1]>20200400)&x[,3]>0,];
> x[,1] <- as.Date(as.character(x[,1]),"%Y%m%d");
> head(x,6)
  Award.Date Award.Type Grant.Amount
1 2020-05-14      grant       852780
2 2020-05-14      grant       573727
3 2020-05-14      grant      1446721
4 2020-05-14      grant       583099
5 2020-05-14      grant     10028068
6 2020-05-14      grant      4265781
> tail(x,6)
      Award.Date Award.Type Grant.Amount
10667 2020-05-08      grant      4179700
10668 2020-05-11      grant     26595000
10669 2020-04-10      grant       913000
10670 2020-04-10      grant       700000
10671 2020-05-14      grant      6903307
10672 2020-05-13      grant      9858000
> y[,setattr(Grant.Amount,"levels",gsub("[$,]","",levels(Grant.Amount))),]
NULL
> y<-y[Award.Date>20200400 & as.numeric(levels(Grant.Amount))[as.integer(Grant.Amount)]>0,]
> y[,Award.Date:=as.Date(as.character(Award.Date),"%Y%m%d")]
> print(y,topn = 6)
       Award.Date Award.Type Grant.Amount
    1: 2020-05-14      grant       852780
    2: 2020-05-14      grant       573727
    3: 2020-05-14      grant      1446721
    4: 2020-05-14      grant       583099
    5: 2020-05-14      grant     10028068
    6: 2020-05-14      grant      4265781
   ---                                   
10088: 2020-05-08      grant      4179700
10089: 2020-05-11      grant     26595000
10090: 2020-04-10      grant       913000
10091: 2020-04-10      grant       700000
10092: 2020-05-14      grant      6903307
10093: 2020-05-13      grant      9858000
> 

I would like to ask you some questions:

  1. What Assignment Operators should I use when working with the package data.table "=" or "<-"?

  2. The following code could be improved to make it more "friendly"

y<-y[Award.Date>20200400 & as.numeric(levels(Grant.Amount))[as.integer(Grant.Amount)]>0,]

As for factor work, I am well messed up! Where I could find examples and documents working with the data.table package. with Categorical data in R: factors and strings

  1. Today with R can be done with the variables of characters the same as with factors?

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.