Importing csv does not recognize negative numbers

I have a csv file that has negative numbers in parenthesis, ie. (100) = -100. When I try to bring in the data it converts the column to character so I change it to numeric which converts all negatives to 0s. Any ideas how to fix this?
Thank you.

I don't know of a way to do this other than replacing characters and then using as.numeric()

library(dplyr)
library(stringr)
df <- read.table("/home/fjcc/R/Play/Test", stringsAsFactors = FALSE, header = TRUE)
df
#>    id     x      y     z      w
#> 1  10 35.25 120.25 1e-04   10.1
#> 2  11 35.25 120.75 2e-03 (11.2)
#> 3  12 35.25 121.25 5e-04    9.5
#> 4  13 35.25 121.75 3e-04    8.2
#> 5  14 35.25 122.25 2e-04   11.5
#> 6  15 35.25 122.75 2e-04 (20.2)
#> 7  16 35.25 123.25 2e-04  (2.1)
#> 8  17 35.25 123.75 2e-04   25.1
#> 9  18 35.75 120.25 2e-04   24.0
#> 10 19 35.75 120.75 2e-04   10.8
#> 11 20 35.75 121.25 4e-04 (10.9)
#> 12 21 35.75 121.75 4e-02   16.5
#> 13 22 35.75 122.25 4e-04   18.3
#> 14 23 35.75 122.75 3e-02   12.0
#> 15 24 35.75 123.25 5e-02   20.3
#> 16 25 35.75 123.75 6e-01   22.4
df <- df %>% mutate(w = str_replace(w, "\\(", "-")) %>% 
  mutate(w = str_replace(w, "\\)", "")) %>%
  mutate(w = as.numeric(w))
df           
#>    id     x      y     z     w
#> 1  10 35.25 120.25 1e-04  10.1
#> 2  11 35.25 120.75 2e-03 -11.2
#> 3  12 35.25 121.25 5e-04   9.5
#> 4  13 35.25 121.75 3e-04   8.2
#> 5  14 35.25 122.25 2e-04  11.5
#> 6  15 35.25 122.75 2e-04 -20.2
#> 7  16 35.25 123.25 2e-04  -2.1
#> 8  17 35.25 123.75 2e-04  25.1
#> 9  18 35.75 120.25 2e-04  24.0
#> 10 19 35.75 120.75 2e-04  10.8
#> 11 20 35.75 121.25 4e-04 -10.9
#> 12 21 35.75 121.75 4e-02  16.5
#> 13 22 35.75 122.25 4e-04  18.3
#> 14 23 35.75 122.75 3e-02  12.0
#> 15 24 35.75 123.25 5e-02  20.3
#> 16 25 35.75 123.75 6e-01  22.4
summary(df)
#>        id              x               y               z          
#>  Min.   :10.00   Min.   :35.25   Min.   :120.2   Min.   :0.00010  
#>  1st Qu.:13.75   1st Qu.:35.25   1st Qu.:121.1   1st Qu.:0.00020  
#>  Median :17.50   Median :35.50   Median :122.0   Median :0.00035  
#>  Mean   :17.50   Mean   :35.50   Mean   :122.0   Mean   :0.04531  
#>  3rd Qu.:21.25   3rd Qu.:35.75   3rd Qu.:122.9   3rd Qu.:0.00900  
#>  Max.   :25.00   Max.   :35.75   Max.   :123.8   Max.   :0.60000  
#>        w          
#>  Min.   :-20.200  
#>  1st Qu.:  5.625  
#>  Median : 11.150  
#>  Mean   :  9.019  
#>  3rd Qu.: 18.800  
#>  Max.   : 25.100

Created on 2019-08-15 by the reprex package (v0.2.1)

3 Likes

Another possibility:

y <- c("0", "(1)", "1.2", "(3.5)", "8.13", "(21.34)")

z <- as.numeric(x = gsub(pattern = "\\((\\d*(\\.\\d+)?)\\)",
                         replacement = "-\\1",
                         x = y))

data.frame(y, z, stringsAsFactors = FALSE)
#>         y      z
#> 1       0   0.00
#> 2     (1)  -1.00
#> 3     1.2   1.20
#> 4   (3.5)  -3.50
#> 5    8.13   8.13
#> 6 (21.34) -21.34

Created on 2019-08-16 by the reprex package (v0.3.0)

1 Like