Upload thousands of csv and merge them together

Hi,

I need to upload about 21 000 csv files from my computers directory.
They are structured like so: 2005_4.csv where 2005 is the year and 4 is the day of the year. I have those files for more than 20 years. Obvisouly I can't load them all one by one and have no idea how to do that operation.
They are all structured the same way though like so:

structure(list(A08B08_...79.4945...48.4148..296.....2.68 = c("A08E31_ -79.4679 48.4254 278. 2.77",
"A09A03_ -79.4848 48.4178 301. 2.66", "A12B01_ -79.4944 48.4146 296. 2.69",
"A12B01_ -79.4944 48.4146 296. 2.69", "A12B01_ -79.4944 48.4146 296. 2.69",
"B12RJ52 -78.6642 49.4051 275. 1.72", "B12YE02 -78.8871 49.2079 335. 1.87",
"ESB14M3 -79.1944 48.4529 277. 2.83", "F04F51_ -79.2888 48.4951 299. 2.71",
"F09B02_ -79.3472 48.4472 273. 2.81", "F09J11_ -79.3461 48.4964 273. 2.80",
"F12I01_ -79.1944 48.4496 280. 2.82", "F18P01_ -79.2628 48.4395 273. 2.83",
"H05Z02_ -79.3870 48.5190 273. 2.78", "H08Z03_ -79.3712 48.5175 273. 2.79",
"H08Z09_ -79.2122 48.4836 272. 2.85", "H13Z07_ -79.4915 48.5238 302. 2.62",
"H13Z08_ -79.4914 48.5239 302. 2.62", "H13Z25_ -79.2107 48.4825 272. 2.85",
"H18Z27_ -79.4014 48.5175 284. 2.73", "SC14F10 -79.4684 48.4269 278. 2.77",
"A12B01_ -79.4944 48.4146 296. 2.69", "F04F51_ -79.2888 48.4951 299. 2.71",
"F09Y06_ -79.3442 48.4690 276. 2.79", "H05Z02_ -79.3870 48.5190 273. 2.78",
"J20A14_ -79.3595 48.5011 275. 2.79", "J20F05_ -79.3220 48.5038 272. 2.81",
"J21A01_ -79.4791 48.4201 290. 2.71", "J21A06_ -79.3511 48.5180 273. 2.79",
"J21F16_ -79.3619 48.4413 274. 2.80", "F03K08_ -79.2996 48.4407 305. 2.68",
"J22A02_ -79.4816 48.4229 290. 2.71", "J22A12_ -79.3709 48.5176 273. 2.79",
"J22A13_ -79.3526 48.4922 274. 2.80", "J22A36_ -79.3838 48.4229 298. 2.70",
"J22A43_ -79.4866 48.4452 291. 2.70", "J22A50_ -79.4303 48.4779 356. 2.42",
"J22A66_ -79.3386 48.3582 303. 2.68", "J22F05_ -79.3455 48.4663 272. 2.81",
"J22F10_ -79.2696 48.4380 272. 2.83", "J22F11_ -79.3005 48.4410 300. 2.70",
"J20F05_ -79.3222 48.5037 272. 2.81", "S22SD01 -73.8309 45.5396 37. 3.00",
"S22BP01 -73.6831 45.6040 39. 3.07", "S22PP01 -73.5205 45.6910 16. 2.97",
"S22SE01 -73.6260 46.2881 303. 0.54", "S22SE02 -73.6192 46.3146 346. 0.43",
"S22SC01 -73.7259 46.2531 259. 0.55", "S22NO01 -75.0557 46.4020 287. -0.12",
"O22HM33 -79.9951 46.1177 212. 1.35")), class = "data.frame", row.names = c(NA,
-50L))

In an ideal world, I would have a single file for each year where each Id has a single year and date pasted to it!

Hope I was clear. Hope someone can help me!

If the entries in this data frame were droids, you would miss the at the Cantina.

  1. There's no header, and if these are not brought in with the appropriate setting in read.csv(), read_csv() or fread()the column name will be

"A08E31_ -79.4679 48.4254 278. 2.77"

which leads to the second problem

  1. You probably want more than one variable than the single jumbled string
"A08E31_ -79.4679 48.4254 278. 2.77"

So, job one will be to specify the structure of the data frame to be created. Here's one possible view

d <- data.frame(
  dte = structure(c(
    12787, 12787, 12787, 12787,
    12787, 12787, 12787, 12787, 12787, 12787, 12787, 12787, 12787,
    12787, 12787, 12787, 12787, 12787, 12787, 12787, 12787, 12787,
    12787, 12787, 12787, 12787, 12787, 12787, 12787, 12787, 12787,
    12787, 12787, 12787, 12787, 12787, 12787, 12787, 12787, 12787,
    12787, 12787, 12787, 12787, 12787, 12787
  ), class = "Date"),
  sta = c(
    "A08B08_",
    "A08E31_", "A09A03_", "A12B01_", "B12RJ52", "B12YE02", "ESB14M3",
    "F04F51_", "F09B02_", "F09J11_", "F12I01_", "F18P01_", "H05Z02_",
    "H08Z03_", "H08Z09_", "H13Z07_", "H13Z08_", "H13Z25_", "H18Z27_",
    "SC14F10", "F09Y06_", "J20A14_", "J20F05_", "J21A01_", "J21A06_",
    "J21F16_", "F03K08_", "J22A02_", "J22A12_", "J22A13_", "J22A36_",
    "J22A43_", "J22A50_", "J22A66_", "J22F05_", "J22F10_", "J22F11_",
    "J20F05_", "S22SD01", "S22BP01", "S22PP01", "S22SE01", "S22SE02",
    "S22SC01", "S22NO01", "O22HM33"
  ), lon = c(
    -79.4945, -79.4679,
    -79.4848, -79.4944, -78.6642, -78.8871, -79.1944, -79.2888, -79.3472,
    -79.3461, -79.1944, -79.2628, -79.387, -79.3712, -79.2122, -79.4915,
    -79.4914, -79.2107, -79.4014, -79.4684, -79.3442, -79.3595, -79.322,
    -79.4791, -79.3511, -79.3619, -79.2996, -79.4816, -79.3709, -79.3526,
    -79.3838, -79.4866, -79.4303, -79.3386, -79.3455, -79.2696, -79.3005,
    -79.3222, -73.8309, -73.6831, -73.5205, -73.626, -73.6192, -73.7259,
    -75.0557, -79.9951
  ), lat = c(
    48.4148, 48.4254, 48.4178, 48.4146,
    49.4051, 49.2079, 48.4529, 48.4951, 48.4472, 48.4964, 48.4496,
    48.4395, 48.519, 48.5175, 48.4836, 48.5238, 48.5239, 48.4825,
    48.5175, 48.4269, 48.469, 48.5011, 48.5038, 48.4201, 48.518,
    48.4413, 48.4407, 48.4229, 48.5176, 48.4922, 48.4229, 48.4452,
    48.4779, 48.3582, 48.4663, 48.438, 48.441, 48.5037, 45.5396,
    45.604, 45.691, 46.2881, 46.3146, 46.2531, 46.402, 46.1177
  ),
  census = c(
    296, 278, 301, 296, 275, 335, 277, 299, 273, 273,
    280, 273, 273, 273, 272, 302, 302, 272, 284, 278, 276, 275,
    272, 290, 273, 274, 305, 290, 273, 274, 298, 291, 356, 303,
    272, 272, 300, 272, 37, 39, 16, 303, 346, 259, 287, 212
  ),
  idx = c(
    2.68, 2.77, 2.66, 2.69, 1.72, 1.87, 2.83, 2.71, 2.81,
    2.8, 2.82, 2.83, 2.78, 2.79, 2.85, 2.62, 2.62, 2.85, 2.73,
    2.77, 2.79, 2.79, 2.81, 2.71, 2.79, 2.8, 2.68, 2.71, 2.79,
    2.8, 2.7, 2.7, 2.42, 2.68, 2.81, 2.83, 2.7, 2.81, 3, 3.07,
    2.97, 0.54, 0.43, 0.55, -0.12, 1.35
  )
)
head(d)
#>          dte     sta      lon     lat census  idx
#> 1 2005-01-04 A08B08_ -79.4945 48.4148    296 2.68
#> 2 2005-01-04 A08E31_ -79.4679 48.4254    278 2.77
#> 3 2005-01-04 A09A03_ -79.4848 48.4178    301 2.66
#> 4 2005-01-04 A12B01_ -79.4944 48.4146    296 2.69
#> 5 2005-01-04 B12RJ52 -78.6642 49.4051    275 1.72
#> 6 2005-01-04 B12YE02 -78.8871 49.2079    335 1.87

Created on 2023-06-29 with reprex v2.0.2

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.