"Cannot Open Connection" Error

set_colnames_remove_unk_chrx <- function(df, x){
  colnames(df) <- c("Chromosome", "?", "?2", "Index", "SNP1", "SNP2")
  return(df[df$Chromosome == x, ])
}

unpack <- function(l){
  return(l[[1]])
}

sig_test <- function(chr, path="", save=FALSE){
  options(warn=-1)
  weight <- read.csv("C:\\Users\\noree\\OneDrive - The University of Chicago\\Genetics Lab-Info and RMD file\\Genetics 20187 Dog GWAS files\\Major\\weight2022.csv")
  names <- weight$namegetw
  
  #Read in raw data files named {dog_name}.tped
  dfs <- lapply(paste0(names, '.tped'), read.table, stringsAsFactors=FALSE)
  dfs <- lapply(dfs, set_colnames_remove_unk_chrx, chr)
  names(dfs) <- names
  
  #Bin by weight
  large_names <- weight[weight$weight > 35, 'name']
  small_names <- weight[weight$weight <= 35, 'name']
  large_df <- dfs[large_names]
  small_df <- dfs[small_names]
  rm(dfs)#free up memory
  
  
  small_df <- Map(function(x, i) setNames(x, ifelse(names(x) %in% c("Chromosome", "Index", "?"), names(x), sprintf('%s.%d', names(x), i))), small_df, seq_along(small_df))
  large_df <- Map(function(x, i) setNames(x, ifelse(names(x) %in% c("Chromosome", "Index", "?"), names(x), sprintf('%s.%d', names(x), i))), large_df, seq_along(large_df))
  
  
  small_df <- Reduce(function(dtf1, dtf2) merge(dtf1, dtf2, by = c("Chromosome", "Index", "?"), all = TRUE), small_df)
  large_df <- Reduce(function(dtf1, dtf2) merge(dtf1, dtf2, by = c("Chromosome", "Index", "?"), all = TRUE), large_df)
  small_df[is.na(small_df)] <- 0
  large_df[is.na(large_df)] <- 0
  
  nsample <- nrow(small_df) 

  small_snps2 <- list()
  large_snps2 <- list()
  for(i in 1:nsample){
    s <- ""
    for(j in 1:length(small_names)){
      s <- paste0(s, small_df[i, sprintf('%s.%d', "SNP1", j)], small_df[i, sprintf('%s.%d', "SNP2", j)])
    }
    small_snps2 <- append(small_snps2, s)
    
    l <- ""
    for(j in 1:length(large_names)){
      l <- paste0(l, large_df[i, sprintf('%s.%d', "SNP1", j)], large_df[i, sprintf('%s.%d', "SNP2", j)])
      
    }
    large_snps2 <- append(large_snps2, l)
  }
  
  r
  small_splitSNPs <- lapply(small_snps2, strsplit, "")
  small_splitSNPs <- lapply(small_splitSNPs, unpack)
  large_splitSNPs <- lapply(large_snps2, strsplit, "")
  large_splitSNPs <- lapply(large_splitSNPs, unpack)
  
  
  small_single_count <- lapply(lapply(small_splitSNPs, factor, levels=c("A", "T", "C", "G")), table)
  large_single_count <- lapply(lapply(large_splitSNPs, factor, levels=c("A", "T", "C", "G")), table)
  write.csv(small_single_count, paste0(path,  "small_single_count", chr, ".csv"), row.names = FALSE)
  write.csv(large_single_count, paste0(path, "large_single_count", chr, ".csv"), row.names = FALSE)
  rm(small_splitSNPs)
  rm(large_splitSNPs)
  
  p_values <- numeric(nsample)
  for(i in 1:nsample){
    
    t <- rbind(small_single_count[[i]],large_single_count[[i]])
    
    remove <- c()
    for(c in 1:ncol(t)){
      if(sum(t[,c] == numeric(2)) == 2){
        remove <- append(remove, c)
      }
    }
    t<-t[,remove*-1]
    
    if(all(is.na(t))){
      p_values[i] <- NA
    }else{
      p_values[i] <- chisq.test(t)$p.value
    }
  }
  return(data.frame("pvals"=p_values,"indices"=small_df$Index[1:nsample]))
}
#This first code chunk has been assigned to me. The only thing that I have altered is that I have typed the full working directory for the variable "weight"
out1 <- sig_test(1)
out2 <- sig_test(2)
out3 <- sig_test(3)
out4 <- sig_test(4)

pvals1 <- out1$pvals[-1*which(is.na(out1$pvals))]
pvals2 <- out2$pvals[-1*which(is.na(out2$pvals))]
pvals3 <- out3$pvals[-1*which(isfi.na(out3$pvals))]
pvals4 <- out3$pvals[-1*which(is.na(out4$pvals))]

indices1 <- out1$indices
indices2 <- out2$indices
indices3 <- out3$indices
indices4 <- out4$indices
# Whenever I run this chunk, I get an error message, "Error in file(file, "rt"), cannot open the connection," with the traceback as follows:
          "4. file(file, "rt")
3. FUN(X[[i]], ...)
2. lapply(paste0(names, ".tped"), read.table, stringsAsFactors = FALSE)
1. sig_test(1)"

Is isfi.na() the function you intend?

I have corrected that error, but the main problem is not resolved.

Try copying this into your working directory to avoid potential problems resolving the filename.

When in input this into setwd(), I get a "cannot change working directory" error

I'm not a Windows user, so I'm guessing that OneDrive is at fault. My suggestion is to make a copy of

into the directory shown by getwd().

What do you mean "make a copy of the code into the working directory"? Do you mean replace the working directory with a copy of my code?

Sorry to be unclear—put a copy of the OneDrive file ending in weight2022.csv into the same directory shown by the command getwd(), which will be somewhere that your code is running, either by default or by opening a project or otherwise. You don't want to replace the contents of the working direction, just add the one file. Do that and confirm in the Files pane or with the system browser. Then

weight <- read.csv("weight2022.csv")

should work.

On my computer, R has trouble finding the file I want unless I write the full directory.
On a related note, my instructor mentioned that the "weight2022.csv" may be hard to read for some computers, so they suggested that we put this line of code "at the beginning of your "sig_test" function (after the function name)"

colnames(weight) <- c("name", "weight")

Where exactly do I put this line, and what other adjustments would I have to make?

OK, are you using RStudio or the R console from terminal?

may be what

is supposed to fix. colnames() assigns variable names.

There is a package that is supposed to cut through the OneDrive fog. I can't test it (Windows free since long time passing) but it might help fetch a local copy of the file for you.

I am using R studio, also I realize that

names <- weight$namegetw

should be

names <- weight$name

Nonetheless fixing that mistake will not improve the situation. I will take your advice with the package.

1 Like

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.