Error in `[.data.frame`(Data, , 4) : undefined columns selected

Hi There,

I have read in data. And the data is in colomn 3. But the eroor message is Error in [.data.frame(Data, , 4) : undefined columns selected
Elin

Can you supply us with the code you are using and some sample data?

Hi, Thank you!

Here is the the script:
#library(SEF)
library(dataresqc)
outpath = "C:/Users/elinl/Documents"

inventory <- read.table("C:/Users/elinl/Documents/UniBe/Instrumentelle data/Inventories/Inventory_Berkeley-Earth.txt", colClasses="character", header = FALSE)

colnames(inventory) <- c("Other_ID", "City", "Modern_Country", "Lat.degN", "Lon.degE", "Station_Elevation.m", "Start_Year", "End_Year" )

allfiles = list.files("C:/Users/elinl/Documents/Unibe/Instrumentelle data/Berkeley_Earth/Stations", full.names=TRUE)
for (currentfile in inventory$City) {

currentfile innholder nå navnet på filen som skal leses inn

her fyller du på med kommandoene som skal kjøres for hver fil

index <- grep(currentfile, allfiles)
#read in data -the x
x <- read.table(allfiles[index], header = FALSE, fill=TRUE, sep="\t",stringsAsFactors=FALSE, na.strings="-9999")

colnames(x) <- c("Year", "Month", "value_tavg")
year<- x[,1]
months<- as.integer(x[, 2])
value_tavg <- x[, 3]

#inventory <- read.csv("Inventory_Canada.txt", sep=";", stringsAsFactors=F)
index <- which(inventory$City==currentfile)
lat <- inventory$Lat.degN[index]
lon <- inventory$Lon.degE[index]
alt <- inventory$Station_Elevation.m[index]
station_name <- inventory$City[index]
#one of these:

df <- data.frame(y=year, m=months, value_tavg, stringsAsFactors=FALSE)

df <- data.frame(y=year, m=months, d="NA",
hh=rep("",nrow(x)), mm=rep("",nrow(x)), value_tavg, stringsAsFactors=FALSE)

df <- data.frame(y=year, m=months, d=days,hh=rep("",length(x[1])), mm=rep("",length(x[1])), value=x[,3], stringsAsFactors=FALSE)

write_sef(df, variable="ta", cod=inventory$City[index], nam=station_name, lat=lat,
lon=lon, alt=alt, sou="BE", units="C", stat="mean", period="month")

first_year <- min(df$y)
last_year <- max(df$y)
file.rename(list.files(pattern=as.character(inventory$Other_ID[index])),
paste("BE",inventory$Other_ID[index],station_name,first_year,last_year,"ta_monthly.tsv",
sep="_"))
}

[https://www.dropbox.com/sh/jbeedvneo7cwuus/AAAz3jsFulX2S3AjW2GdtaC0a?dl=0]

https://www.dropbox.com/s/pd9aeezraoha4iy/Inventory_Berkeley-Earth.txt?dl=0

library(SEF)

inventory <- read.table("C:/Users/elinl/Documents/UniBe/Instrumentelle data/ISTI/Inventorys/Inventory_Alps2.txt", colClasses="character")
colnames(inventory) <- c("Other_ID", "City", "Modern_Country", "Lat.degN", "Lon.degE", "Station_Elevation.m", "Start_Year", "End_Year" )

allfiles = list.files("C:/Users/elinl/Documents/Unibe/Instrumentelle data/ISTI/Alps", full.names=TRUE)
for (currentfile in inventory$Other_ID) {

currentfile innholder nå navnet på filen som skal leses inn

her fyller du på med kommandoene som skal kjøres for hver fil

index <- grep(currentfile, allfiles)
#read in data -the x
x <- read.table(allfiles[index], header = FALSE, fill=TRUE, sep="",stringsAsFactors=FALSE, na.strings="-9999")

colnames(x) <- c("City", "Lat", "Lon", "Station Elevation", "Date", "value_tmin","value_tmax","value_tavg","other")
year<- as.numeric(substr(x$Date,1,4))
months<- as.numeric(substr(x$Date,5,6))
value_tavg <- x$value_tavg/100

#inventory <- read.csv("Inventory_Canada.txt", sep=";", stringsAsFactors=F)
index <- which(inventory$Other_ID==currentfile)
lat <- inventory$Lat.degN[index]
lon <- inventory$Lon.degE[index]
alt <- inventory$Station_Elevation.m[index]
station_name <- inventory$City[index]

df <- data.frame(y=year, m=months, d="NA",
hh=rep("",nrow(x)), mm=rep("",nrow(x)), value_tavg, stringsAsFactors=FALSE)

write_sef(df, variable="ta", cod=inventory$Other_ID[index], nam=station_name, lat=lat,
lon=lon, alt=alt, sou="ISTI", units="C", stat="mean", period="month")

first_year <- min(df$y)
last_year <- max(df$y)
file.rename(list.files(pattern=as.character(inventory$Other_ID[index])),
paste("ISTI",inventory$Other_ID[index],station_name,first_year,last_year,"ta_monthly.tsv",
sep="_"))
}

New error message:
Error in file(file, "rt") : invalid 'description' argument

traceback()
2: file(file, "rt")
1: read.table(allfiles[index], header = FALSE, fill = TRUE, sep = "\t",
stringsAsFactors = FALSE, na.strings = "-9999")

Excellent, thank you. I have downloaded and imported the data and will start playing with it

Unfortunately I cannot locate the R package SAF. Can you poit me towards it?

I am still poking around but your current error message seems to be coming from here

for (currentfile in inventory$Other_ID) {
index <- grep(currentfile, allfiles)
x <- read.table(allfiles[index], header = FALSE, fill=TRUE, sep="",stringsAsFactors=FALSE, na.strings="-9999")
}
print(index)

integer(0)

Can you explain in words what that for loop is supposed to do?

The code at the moment makes no sense. For example

or (currentfile in inventory$Other_ID) {
index <- grep(currentfile, allfiles)

cannot work because there is no equivalentt of currentfile in allfile

x <- read.table(allfiles[index], header = FALSE, fill=TRUE, sep="",stringsAsFactors=FALSE, na.strings="-9999")

colnames(x) <- c("City", "Lat", "Lon", "Station Elevation", "Date", "value_tmin","value_tmax","value_tavg","other")

makes no sense since x only has three column for any value of x as far as I can see.

A good explanation of the overall intend may show us a better way to look at the problem. It may be that you need to look into some kind of join but we need more explanation of what the data is and what you want to do with it.

I am sorry not to be of more immediate help.

Hi. Thank yous os much for looking at this.
No, There is no R.package SEF anymore. We use this one: library(dataresqc)
And here you need the Outpath

Shall I change the sep to "" ?
The data is tab-separated. I thought that was “\t”.
Isn't?

Thank you anyway for looking at this.

I will try to explain: The loop will read in all the files.
I am reading in e.g. temperature data for hundreds of stations.
And here the loop will read in the mean temperature data pr month for all these stations (>100).

The inventory document gives information about the station like City, Latitude, Longitude etc..
So it provides additional information about the data (like temperature data) from all the stations in the allfiles

x has 3 colomns - yes
But in the inventory (there is much more) =colnames.

Is it clear enough?

Data written to file C:/Users/elinl/Documents/BE_10682_177410-200507_ta.tsv
Data written to file C:/Users/elinl/Documents/BE_13013_177501-200504_ta.tsv
Data written to file C:/Users/elinl/Documents/BE_13630_180001-182412_ta.tsv
Data written to file C:/Users/elinl/Documents/BE_13670_180111-183310_ta.tsv
Data written to file C:/Users/elinl/Documents/BE_13994_178401-187012_ta.tsv
Data written to file C:/Users/elinl/Documents/BE_14189_178101-199204_ta.tsv
Data written to file C:/Users/elinl/Documents/BE_14316_175703-196112_ta.tsv
Data written to file C:/Users/elinl/Documents/BE_14369_177001-193512_ta.tsv
Data written to file C:/Users/elinl/Documents/BE_14402_175911-193512_ta.tsv
Data written to file C:/Users/elinl/Documents/BE_15007_178001-200012_ta.tsv
Data written to file C:/Users/elinl/Documents/BE_15960_179105-200812_ta.tsv
Data written to file C:/Users/elinl/Documents/BE_16173_177401-201311_ta.tsv
Error in file(file, "rt") : invalid 'description' argument
In addition: There were 12 warnings (use warnings() to see them)

It runs 12 stations.
So I think there is something wrong with the 13th station. And that is Milan. But I can't find out what is wrong with Milan. It is exactly the same

Just started looking at your notes. sep = "\t" it is. I had not checked when I made the remark.

1 Like

Hi There, @jrkrideau
Have you looked at this?

Thank you!

Elin

I spent a bit of time on it yesterday and have a number of questions but got pulled away yesterday on some home maintenance issues.

I hope to get back to it , probably with some very simple basic questions, this afternoon. It is still late morning my time and Ihave not had an opportunity to get to it yet.

Sorry for the delay.

Hi - Good morning :slight_smile:

Sorry, I see. No problem! Just aanswer me and ask all the questions you can

Thank you for looking at this.

Elin

I am still messing about with the very first bit of code but using the Achen data is this the first step that you want to do? Presumably we can throw that into a loop with no problem.

setwd("/home/john/RJunk/elinlun")

library(tidyverse)
library(dataresqc)

inventory <-  read.csv("Inventory_Berkeley_Earth.csv", sep = "\t", header = FALSE)

colnames(inventory) <- tolower(c("Other_ID", "City", "Modern_Country", "Lat.degN", "Lon.degE",
                         "Station_Elevation.m", "Start_Year", "End_Year" ))

new_inventory  <-  arrange(inventory, city)

allfiles = (list.files("allfiles", full.names = TRUE))
x <- read.table(allfiles[1], header = FALSE, fill=TRUE, 
                  sep="\t",stringsAsFactors=FALSE, na.strings="-9999")

achen  <-   cbind(new_inventory[1 ,], x)
colnames(achen) <- tolower(c("Index","City", "Lat", "Lon", "Station Elevation", "Date", 
                             "value_tmin","value_tmax","value_tavg","V1", "V2"))


achen

I am still trying to figure out some of the other code. I suspect some of it may be superfluous but I have never seen library(dataresqc) before so I may well be wrong.

P.S Is what you are calling Other_ID the same as what write_sef() calls stationcode?