Splitting Data Errors

I need to build a prediction model as a final project. However, I am having problems splitting the data. Any help you can give me is greatly appreciated. Thanks!

Here is my code:
set.seed(123)
split <- sample.split(Trends,SplitRatio =0.70)
train <- subset(Trends,split==TRUE)
test <- subset(Trends,split==FALSE)

Here is the error:
Error: Must subset rows with a valid subscript vector.
i Logical subscripts must match the size of the indexed input.
x Input has size 2960 but subscript r has size 11.

Here is the rlang:: last_error (I do not understand how to fix this):
<error/vctrs_error_subscript_size>
Must subset rows with a valid subscript vector.
i Logical subscripts must match the size of the indexed input.
x Input has size 2960 but subscript r has size 11.

It continues:
Backtrace
x

  1. +-base::subset(Trends, split == TRUE)
  2. -base::subset.data.frame(Trends, split == TRUE)
  3. +-x[r, vars, drop = drop]
  4. -tibble:::[.tbl_df(x, r, vars, drop = drop)
  5. \-tibble:::tbl_subset_row(xo, i = i, i_arg)
    
  6.   \-tibble:::vectbl_as_row_index(i, x, i_arg)
    
  7.     \-tibble:::vectbl_as_row_location(i, nr, i_arg, assign)
    
  8.       +-tibble:::subclass_row_index_errors(...)
    
  9.       | \-base::withCallingHandlers(...)
    
  10.       \-vctrs::vec_as_location(i, n)
    
  11.         \-(function () ...
    
  12.           \-vctrs:::stop_indicator_size(...)
    

Example of My Dataset:

PeriodEnd HomesSold Inventory MedianDom MedianListPpsf MedianListPrice MedianPpsf MedianSalePrice NewListings PermitBldgs PermitValue
1/31/2017 62 458 99 178 369900 180 367912 76 73 18924074
1/31/2018 60 432 55 191 339900 189 398040 94 65 18275062
10/31/2017 84 425 73 177 334100 191 389118 112 110 26795999
12/31/2017 97 422 53 179 354400 191 411088 83 90 24514262
6/30/2019 51 444 118 185 400520 193 390000 83 85 23513520
7/31/2018 85 472 37 188 372355 185 379900 77 121 34700456
11/30/2019 55 394 110 202 379000 195 409900 67 108 30880359
7/31/2017 74 438 155 191 372800 179 348710 69 112 29117986
11/30/2017 76 418 89 175 369700 183 359752 96 103 27644803
3/31/2017 62 490 43 175 358943 178 384591 98 78 21168124
9/30/2017 67 428 58 180 379500 178 348069 110 141 35000701
8/31/2017 78 431 95 186 356300 178 364315 118 132 32577586
5/31/2018 76 487 62 185 353615 190 438845 111 170 43139495
10/31/2018 75 492 76 195 373645 185 375819 124 139 36139075
# following https://rpubs.com/ID_Tech/S1

set.seed(137)

dat <- data.frame(
  PeriodEnd =
    c("1/31/2017", "1/31/2018", "10/31/2017", "12/31/2017", "6/30/2019", "7/31/2018", "11/30/2019", "7/31/2017", "11/30/2017", "3/31/2017", "9/30/2017", "8/31/2017", "5/31/2018", "0/31/2018"),
  HomesSold =
    c(62, 60, 84, 97, 51, 85, 55, 74, 76, 62, 67, 78, 76, 75),
  Inventory =
    c(458, 432, 425, 422, 444, 472, 394, 438, 418, 490, 428, 431, 487, 492),
  MedianDom =
    c(99, 55, 73, 53, 118, 37, 110, 155, 89, 43, 58, 95, 62, 76),
  MedianListPpsf =
    c(178, 191, 177, 179, 185, 188, 202, 191, 175, 175, 180, 186, 185, 195),
  MedianListPrice =
    c(369900, 339900, 334100, 354400, 400520, 372355, 379000, 372800, 369700, 358943, 379500, 356300, 353615, 373645),
  MedianPpsf =
    c(180, 189, 191, 191, 193, 185, 195, 179, 183, 178, 178, 178, 190, 185),
  MedianSalePrice =
    c(367912, 398040, 389118, 411088, 390000, 379900, 409900, 348710, 359752, 384591, 348069, 364315, 438845, 375819),
  NewListings =
    c(76, 94, 112, 83, 83, 77, 67, 69, 96, 98, 110, 118, 111, 124),
  PermitBldgs =
    c(73, 65, 110, 90, 85, 121, 108, 112, 103, 78, 141, 132, 170, 130),
  PermitValue =
    c(18924074, 18275062, 26795999, 24514262, 23513520, 34700456, 30880359, 29117986, 27644803, 21168124, 35000701, 32577586, 43139495, 36139075)
)

to_take <- floor(0.75 * nrow(dat))
to_take
#> [1] 10


train_idx <- sample(seq_len(nrow(dat)), size = to_take)
train <- dat[train_idx, ]
test <- dat[-train_idx, ]

train
#>     PeriodEnd HomesSold Inventory MedianDom MedianListPpsf MedianListPrice
#> 11  9/30/2017        67       428        58            180          379500
#> 2   1/31/2018        60       432        55            191          339900
#> 8   7/31/2017        74       438       155            191          372800
#> 6   7/31/2018        85       472        37            188          372355
#> 7  11/30/2019        55       394       110            202          379000
#> 3  10/31/2017        84       425        73            177          334100
#> 4  12/31/2017        97       422        53            179          354400
#> 13  5/31/2018        76       487        62            185          353615
#> 1   1/31/2017        62       458        99            178          369900
#> 12  8/31/2017        78       431        95            186          356300
#>    MedianPpsf MedianSalePrice NewListings PermitBldgs PermitValue
#> 11        178          348069         110         141    35000701
#> 2         189          398040          94          65    18275062
#> 8         179          348710          69         112    29117986
#> 6         185          379900          77         121    34700456
#> 7         195          409900          67         108    30880359
#> 3         191          389118         112         110    26795999
#> 4         191          411088          83          90    24514262
#> 13        190          438845         111         170    43139495
#> 1         180          367912          76          73    18924074
#> 12        178          364315         118         132    32577586

test
#>     PeriodEnd HomesSold Inventory MedianDom MedianListPpsf MedianListPrice
#> 5   6/30/2019        51       444       118            185          400520
#> 9  11/30/2017        76       418        89            175          369700
#> 10  3/31/2017        62       490        43            175          358943
#> 14  0/31/2018        75       492        76            195          373645
#>    MedianPpsf MedianSalePrice NewListings PermitBldgs PermitValue
#> 5         193          390000          83          85    23513520
#> 9         183          359752          96         103    27644803
#> 10        178          384591          98          78    21168124
#> 14        185          375819         124         130    36139075
1 Like

Thank you! You are my hero! :superhero:

1 Like

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.