Create a function with many parameters and apply it over a list in R

nikos_geo · March 17, 2020, 2:03pm

I have a list which includes 2 data.frames (i.e. text files) and would like to apply some analysis on them . I performed my analysis to a single data.drame but I want to do the same (analysis) over my list. I am a bit confused on how to do that. My code for the single data.frame is below:

setwd("mydir")

#import .txt file
d = read.table("vourlionas.txt",
               header = FALSE,
               sep="", 
               col.names=c("x", "y"), 
               fill=FALSE, 
               strip.white=TRUE,
               stringsAsFactors = FALSE)

#remove the 1st row of the .txt file
d2 = d[-1,]

d3 = substr(d2$x, 1, 6)   
d4 = substr(d2$y, 1, 7)

df <- cbind(d3, d4)

df1 <- as.data.frame(df)

b = apply(as.matrix(df1),2,as.character)

#rename the column names of the matrix
colnames(b) [1] = paste("x")

mode(b) = "numeric"

b2 <- as.data.frame(b)

ch <- chull(b2)
coords <- b2[c(ch, ch[1]), ]

sp_poly <- SpatialPolygons(list(Polygons(list(Polygon(coords)), ID=1)))

proj4string(sp_poly) <- CRS("+init=epsg:2100")

sp_poly_df <- SpatialPolygonsDataFrame(sp_poly, data=data.frame(ID=1))

test1 = spTransform(sp_poly_df, "+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")

writeOGR(obj=test1, dsn="mydir", layer = "r"
         driver="ESRI Shapefile", overwrite_layer = TRUE)

When I try to create a function and pass it over the list, instead of getting 2 shapefiles I am getting only one:

library(dplyr)
library(purrr)
library(tibble)
library(sp)

#Get list of files
mydir="C:/path/to/textfiles/"
temp = list.files(mydir,pattern="*.txt$",full.names = TRUE)
fn = list.files(mydir,pattern="*.txt$")

temp=enframe(temp)%>%
  mutate(savedir=fn)

temp = temp %>%
  #load in your table
  mutate(data = map(value,~read.table(.,header = FALSE,
                                      sep="", 
                                      col.names=c("x", "y"), 
                                      fill=FALSE, 
                                      strip.white=TRUE,
                                      stringsAsFactors = FALSE)))

#Your function

myfunction = function(temp,save_name){

  #remove the 1st row of the .txt file
  d2 = temp[-1,]

  d3 = substr(d2$x, 1, 6)   
  d4 = substr(d2$y, 1, 7)

  df <- cbind(d3, d4)

  df1 <- as.data.frame(df)

  b = apply(as.matrix(df1),2,as.character)

  #rename the column names of the matrix
  colnames(b) [1] = paste("x")

  mode(b) = "numeric"

  b2 <- as.data.frame(b)

  ch <- chull(b2)
  coords <- b2[c(ch, ch[1]), ]

  sp_poly <- SpatialPolygons(list(Polygons(list(Polygon(coords)), ID=1)))

  proj4string(sp_poly) <- CRS("+init=epsg:2100")

  sp_poly_df <- SpatialPolygonsDataFrame(sp_poly, data=data.frame(ID=1))

  test1 = spTransform(sp_poly_df, "+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")

  writeOGR(obj=test1, dsn="mydir", layer = "r",
           driver="ESRI Shapefile", overwrite_layer = TRUE)

}

map2(temp$data,temp$savedir,~myfunction(.x,.y))

Any thoughts? I have posted the same question here, but the answer I have got was incorrect.

dromano · March 17, 2020, 2:10pm

Hi @nikos_geo, if you run myfunction <- Vectorize(myfunction) before applying it, does that help? Functions often need to be vectorized in order to work with map-like tools.

nikos_geo · March 17, 2020, 2:14pm

Thank your for the reply. Unfortunately no, it didn't help. I can't understand why the function is performing the analysis only in 1 data.frame and not in the entire list.

dromano · March 17, 2020, 2:22pm

Could you post a sample of the contents of temp by pasting the output of dput(temp %>% head()) here?

nikos_geo · March 17, 2020, 2:28pm

structure(list(name = 1:2, value = c("C:/Users/ntziokas/Desktop/auto/3063731380325.txt", 
"C:/Users/ntziokas/Desktop/auto/3073749905003.txt"), savedir = c("C:/Users/ntziokas/Desktop/auto/3063731380325.txt", 
"C:/Users/ntziokas/Desktop/auto/3073749905003.txt"), data = list(
    structure(list(x = c("x", "306210.926", "306317.914", "306358.873", 
    "306366.352", "306368.777", "306260.655", "306259.157", "306256.893", 
    "306209.826", "306210.926"), y = c("y", "4373977.512", "4373838.333", 
    "4373869.788", "4373874.506", "4373876.006", "4374015.196", 
    "4374014.559", "4374015.263", "4373978.943", "4373977.512"
    )), class = "data.frame", row.names = c(NA, -11L)), structure(list(
        x = c("x", "307861.71", "307860.036", "307856.292", "307898.422", 
        "307949.512", "307956.067", "307861.73", "307861.71"), 
        y = c("y", "4374154.479", "4374095.852", "4373964.716", 
        "4373964.716", "4373963.374", "4374151.803", "4374155.143", 
        "4374154.479")), class = "data.frame", row.names = c(NA, 
    -9L)))), row.names = c(NA, -2L), class = c("tbl_df", "tbl", 
"data.frame"))

dromano · March 17, 2020, 2:49pm

I'm not sure if this is part of the issue yet, but if you run temp$data %>% pluck(1), you'll see that column names have been duplicated.

dromano · March 17, 2020, 2:58pm

I see you strip the duplicates out in the function. More soon...

nikos_geo · March 17, 2020, 2:58pm

Yes, you are right. Not sure why this happened. I will try to fix it and will come back. Thank you for your help.

dromano · March 17, 2020, 3:09pm

For the purposes of your question, you may not need to fix it -- the code inside the function worked previously, and stripped the extra row out.

nikos_geo · March 18, 2020, 7:35am

I can't understand. The code inside the function works perfectly but it's not applied on the whole list, just on the first data frame of the list. Do you have any other thoughts?

nirgrahamuk · March 18, 2020, 10:58am

Your code is written so as to output any give result to a singular folder 'mydir' writing out 4 objects.
what do you want the result to be ?
write out multple outputs to multipy directories?
or have a file labelling scheme for everything going into 'mydir' ?

nikos_geo · March 18, 2020, 11:15am

The result I want to be 2 shapefiles (because I have 2 data.frames).

nirgrahamuk · March 18, 2020, 11:19am

Have you looked to see the output of your writeOGR in mydir ?
it produces
r.shp
r.shx
r.df
r.prj

Do you need all of these ? do you care only for the shp file ?
do you want to write out the R object representing test1 functions save() and saveRDS() are possibilities. They would naturally write out exactly one file, and not the 4 that writeOGR is producing for you.

nikos_geo · March 18, 2020, 11:45am

yes I have. I need all 4 files because each file contains spatial information about the polygon (the r.shp file), which means I need 8 files (4 files for every element in my original list).

nirgrahamuk · March 18, 2020, 12:18pm

This works for me:

temp <- structure(savelabel = c(
  "3063731380325",
  "3073749905003"), data = list(
  structure(list(x = c(
    "x", "306210.926", "306317.914", "306358.873",
    "306366.352", "306368.777", "306260.655", "306259.157", "306256.893",
    "306209.826", "306210.926"
  ), y = c(
    "y", "4373977.512", "4373838.333",
    "4373869.788", "4373874.506", "4373876.006", "4374015.196",
    "4374014.559", "4374015.263", "4373978.943", "4373977.512"
  )), class = "data.frame", row.names = c(NA, -11L)), structure(list(
    x = c(
      "x", "307861.71", "307860.036", "307856.292", "307898.422",
      "307949.512", "307956.067", "307861.73", "307861.71"
    ),
    y = c(
      "y", "4374154.479", "4374095.852", "4373964.716",
      "4373964.716", "4373963.374", "4374151.803", "4374155.143",
      "4374154.479"
    )
  ), class = "data.frame", row.names = c(
    NA,
    -9L
  ))
)), row.names = c(NA, -2L), class = c(
  "tbl_df", "tbl",
  "data.frame"
))

# Your function
library(dplyr)
library(purrr)
library(tibble)
library(sp)
library(rgdal)
myfunction <- function(temp, save_name) {

  # remove the 1st row of the .txt file
  d2 <- temp[-1, ]

  d3 <- substr(d2$x, 1, 6)
  d4 <- substr(d2$y, 1, 7)

  df <- cbind(d3, d4)

  df1 <- as.data.frame(df)

  b <- apply(as.matrix(df1), 2, as.character)

  # rename the column names of the matrix
  colnames(b) [1] <- paste("x")

  mode(b) <- "numeric"

  b2 <- as.data.frame(b)

  ch <- chull(b2)
  coords <- b2[c(ch, ch[1]), ]

  sp_poly <- SpatialPolygons(list(Polygons(list(Polygon(coords)), ID = 1)))

  proj4string(sp_poly) <- CRS("+init=epsg:2100")

  sp_poly_df <- SpatialPolygonsDataFrame(sp_poly, data = data.frame(ID = 1))

  test1 <- spTransform(sp_poly_df, "+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")

  writeOGR(
    obj = test1, dsn = "mydir", layer = "r",
    driver = "ESRI Shapefile", overwrite_layer = TRUE
  )
  file.rename(from = "mydir/r.shp",paste0("mydir/",save_name,".shp"))
  file.rename(from = "mydir/r.shx",paste0("mydir/",save_name,".shx"))
  file.rename(from = "mydir/r.dbf",paste0("mydir/",save_name,".dbf"))
  file.rename(from = "mydir/r.prj",paste0("mydir/",save_name,".prj"))
  }


walk2(temp$data, temp$savelabel, ~ myfunction(.x, .y))

nikos_geo · March 18, 2020, 12:23pm

I will test it now and I will inform you about the results. Thank you for your help.

dromano · March 18, 2020, 12:48pm

Could you change the value of the layer = argument to get the same result?

writeOGR(
    obj = test1, dsn = "mydir", layer = "save_name",
    driver = "ESRI Shapefile", overwrite_layer = TRUE
  )

nirgrahamuk · March 18, 2020, 12:51pm

seems more elegant dromano, I don't have experience of OGR (whatever that stands for , so i used an 'external' approach. cheers for your experise.

dromano · March 18, 2020, 12:54pm

I actually don't know if it works -- I can't get that function to work on my machine for some reason.

system · April 8, 2020, 1:05pm

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.