anti_join is not working. how to do in base r?

how does someone do a complement function in rstudio? I want this ultimately to work on a dataframe. Here I am using simple variables to start. The dataset I want to apply this to has id numbers.

zf <- c(1,2,3,4,5,6,7,8,9)
cat(zf,"\n")

zf1 <- c(1,3,5,7,9)
cat(zf1,"\n")

zf2 <- zf[!zf1]
cat(zf2,"\n","\n")

library(dplyr)
anti_join(zf1,df)

OUTPUT:
1 2 3 4 5 6 7 8 9
1 3 5 7 9
Error in UseMethod("anti_join") :
no applicable method for 'anti_join' applied to an object of class "c('double', 'numeric')"

zf <- c(1,2,3,4,5,6,7,8,9)
zf <-as.data.frame(zf)
zf

zf1 <- c(1,3,5,7,9)
zf1 <-as.data.frame(zf1)
zf1

zf2 <- zf[!zf1]
zf2 <-as.data.frame(zf2)
zf2

library(dplyr)
anti_join(zf1,df)

OUTPUT:
Error in anti_join(): ! by must be supplied when x and y have no common variables. :information_source: use by = character()` to perform a cross-join. Backtrace: 1. dplyr::anti_join(zf1, df) 2. dplyr:::anti_join.data.frame(zf1, df)

[image] Show Traceback

Error in anti_join(zf1, df) : :information_source: use by = character()` to perform a cross-join.

Here is an example of using anti_join().

zf <- c(1,2,3,4,5,6,7,8,9)
zf <-as.data.frame(zf)
zf
#>   zf
#> 1  1
#> 2  2
#> 3  3
#> 4  4
#> 5  5
#> 6  6
#> 7  7
#> 8  8
#> 9  9

zf1 <- c(1,3,5,7,9)
zf1 <-as.data.frame(zf1)
zf1
#>   zf1
#> 1   1
#> 2   3
#> 3   5
#> 4   7
#> 5   9


library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
anti_join(zf,zf1, by = c(zf = "zf1"))
#>   zf
#> 1  2
#> 2  4
#> 3  6
#> 4  8

Created on 2023-03-29 with reprex v2.0.2

(zf <- c(1,2,3,4,5,6,7,8,9))
#> [1] 1 2 3 4 5 6 7 8 9
(zf1 <- c(1,3,5,7,9))
#> [1] 1 3 5 7 9
(zf2 <- zf[!zf1]) # nothing in zf1 that isn't in zf
#> numeric(0)
(zf2 <- setdiff(zf,zf1))
#> [1] 2 4 6 8

Hi,
I tried to make a function out of your code. Why is the return not working? I tried this four ways. Basic code is very nice. Simple. I am building up to a dataset.

zf <- c(1,2,3,4,5,6,7,8,9)
zf1 <- c(1,3,5,7,9)

METHOD 1

mmSplit <- function(zf,zf1){
           zf2 <- zf[!zf1]
           zf2 <- setdiff(zf,zf1)
           myDim <- dim(zf2)
           myDim
           topOfFile <- head(zf2,3)
           topOfFile
           my_list <- list(myDim,topOfFile)
           return(my_list) 
           }
mmSplit(zf,zf1)
print(topOfFile)

OUTPUT:
[[1]]
NULL

[[2]]
[1] 2 4 6

Error in print(topOfFile) : object 'topOfFile' not found

METHOD 2

mmSplit <- function(zf,zf1){
           zf2 <- zf[!zf1]
           zf2 <- setdiff(zf,zf1)
           myDim <- dim(zf2)
           myDim
           topOfFile <- head(zf2,3)
           my_list <- list(topOfFile)
           return(topOfFile) 
           }
mmSplit(zf,zf1)
print(topOfFile)

OUTPUT:
[1] 2 4 6
Error in print(topOfFile) : object 'topOfFile' not found

METHOD 3

mmSplit <- function(zf,zf1){
           zf2 <- zf[!zf1]
           zf2 <- setdiff(zf,zf1)
           myDim <- dim(zf2)
           myDim
           topOfFile <- head(zf2,3)
           topOfFile
           return(topOfFile) 
           }
mmSplit(zf,zf1)
topOfFile

Error: object 'topOfFile' not found

METHOD 4

mmSplit <- function(zf,zf1){
           zf2 <- zf[!zf1]
           zf2 <- setdiff(zf,zf1)
           write.csv(zf2,file="zf2.csv",row.names=FALSE)
           dim(zf2)
           head(zf2,3)
           }
mmSplit(zf,zf1)

OUTPUT:
[1] 2 4 6

The variable topOfFile only exists while the function is running. If you want to save what is returned by the function, you have to assign it to a variable.

zf <- c(1,2,3,4,5,6,7,8,9)
zf1 <- c(1,3,5,7,9)
mmSplit <- function(zf,zf1){
  zf2 <- setdiff(zf,zf1)
  topOfFile <- head(zf2,3)
  return(topOfFile) 
}
NewVar <- mmSplit(zf,zf1)
print(topOfFile)
#> Error in print(topOfFile): object 'topOfFile' not found
print(NewVar)
#> [1] 2 4 6

Created on 2023-03-31 with reprex v2.0.2

1 Like