How to find the same values between multiple vectors?

I'm trying to compare 53 vectors containing an alphanumerical code describing the residue's position inside a protein sequence. I need to make an all vs. all comparison to having shared values for each comparison step.

PER1_PASA <- as.vector(proteins2$PER1_PASA) head(PER1_PASA) [1] "THR_B1_1" "PHE_B1_2" "SER_B1_3" "VAL_B1_4" "ALA_B1_5" "VAL_B1_6"

Any Ideas, to determine which elements are shared among all the vectors, after compared all vs all. Note that is not mandatory that every vector have a term shared with all the others.

(v1 <- letters[1:3])
(v2 <- letters[3:5])

intersect(v1,v2)

Nope, because this is how is filled a vector, I've to compare all the elements in this vector with all the elements in the other vectors, which has to be done for everyone.

Hi!

To help us help you, could you please prepare a reproducible example (reprex) illustrating your issue? Please have a look at this guide, to see how to create one:

HI, ok I'll try!

> head(proteins2)
# A tibble: 6 × 53
  AHRR_PAS AHR_PASA AHR_PASB ARNT2_P…¹ ARNT2…² ARNTL…³ ARNTL…⁴ ARNTL…⁵ ARNTL…⁶ ARNT_…⁷ ARNT_…⁸ CLOCK…⁹ CLOCK…˟ HIF1A…˟ HIF1A…˟ HIF2A…˟ HIF2A…˟ HIF3A…˟ HIF3A…˟ KCNH1 KCNH2 KCNH3
  <chr>    <chr>    <chr>    <chr>     <chr>   <chr>   <chr>   <chr>   <chr>   <chr>   <chr>   <chr>   <chr>   <chr>   <chr>   <chr>   <chr>   <chr>   <chr>   <chr> <chr> <chr>
1 PHE_B1_1 PHE_B1_1 PHE_B1_1 GLY_B1_1  GLU_B1… GLY_B1… GLU_B1… PHE_B1… GLU_B1… GLY_B1… GLU_B1… GLY_B1… GLU_B1… PHE_B1… THR_B1… PHE_B1… THR_B1… GLY_B1… ALA_B1… ASN_… PHE_… PHE_…
2 ALA_B1_2 VAL_B1_2 ARG_B1_2 PHE_B1_2  PHE_B1… PHE_B1… PHE_B1… LEU_B1… TYR_B1… PHE_B1… PHE_B1… PHE_B1… PHE_B1… VAL_B1… PHE_B1… ILE_B1… PHE_B1… PHE_B1… PHE_B1… PHE_… ILE_… VAL_…
3 LEU_B1_3 LEU_B1_3 THR_B1_3 LEU_B1_3  LEU_B1… LEU_B1… ILE_B1… PHE_B1… VAL_B1… LEU_B1… ILE_B1… PHE_B1… THR_B1… MET_B1… LEU_B1… ALA_B1… LEU_B1… VAL_B1… LEU_B1… VAL_… ILE_… LEU_…
4 VAL_B1_4 VAL_B1_4 LYS_B1_4 PHE_B1_4  SER_B1… PHE_B1… THR_B1… VAL_B1… SER_B1… PHE_B1… SER_B1… LEU_B1… SER_B1… VAL_B1… SER_B1… VAL_B1… SER_B1… MET_B1… SER_B1… LEU_… ALA_… GLY_…
5 VAL_B1_5 VAL_B1_5 HIS_B1_5 VAL_B1_5  ARG_B1… VAL_B1… ARG_B1… VAL_B1… ARG_B1… ILE_B1… ARG_B1… ALA_B1… ARG_B1… LEU_B1… ARG_B1… VAL_B1… ARG_B1… VAL_B1… ARG_B1… GLY_… ASN_… ASN_…
6 SER_B1_6 THR_B1_6 LYS_L1_1 VAL_B1_6  HIS_B1… VAL_B1… PHE_B1… GLY_B1… HIS_B1… VAL_B1… HIS_B1… ILE_B1… HIS_B1… THR_B1… HIS_B1… THR_B1… HIS_B1… LEU_B1… HIS_B1… ASN_… ALA_… ALA_…

This is the data frame, after that, I've extracted each column as vectors

AHRR <- as.vector(proteins2$AHRR_PAS)
AHR_PASA <-  as.vector(proteins2$AHR_PASA)
AHR_PASB <- as.vector(proteins2$AHR_PASB)

and they look like this

> print(AHR_PASA)
  [1] "PHE_B1_1"  "VAL_B1_2"  "LEU_B1_3"  "VAL_B1_4"  "VAL_B1_5"  "THR_B1_6"  "THR_L1_1"  "ASP_L1_2"  "ALA_L1_3"  "LEU_B2_1"  "VAL_B2_2"  "PHE_B2_3"  "TYR_B2_4"  "ALA_B2_5" 
 [15] "SER_L2_1"  "SER_H1_1"  "THR_H1_2"  "ILE_H1_3"  "GLN_H1_4"  "ASP_H1_5"  "TYR_H1_6"  "LEU_H1_7"  "GLY_L3_1"  "PHE_L3_2"  "GLN_L3_3"  "GLN_H2_1"  "SER_H2_2"  "ASP_H2_3" 
 [29] "VAL_H2_4"  "ILE_L4_1"  "HIS_L4_2"  "GLN_L4_3"  "SER_L4_4"  "VAL_H3_1"  "TYR_H3_2"  "GLU_H3_3"  "LEU_L5_1"  "ILE_L5_2"  "HIS_L5_3"  "THR_L5_4"  "GLU_L5_5"  "ASP_L5_6" 
 [43] "ARG_H4_1"  "ALA_H4_2"  "GLU_H4_3"  "PHE_H4_4"  "GLN_H4_5"  "ARG_H4_6"  "GLN_H4_7"  "LEU_H4_8"  "HIS_L6_1"  "TRP_L6_2"  "ALA_L6_3"  "LEU_L6_4"  "ASN_L6_5"  "PRO_L6_6" 
 [57] "SER_L6_7"  "GLN_L6_8"  "CYS_L6_9"  "THR_L6_10" "GLU_L6_11" "SER_L6_12" "GLY_L6_13" "GLN_L6_14" "GLY_L6_15" "ILE_L6_16" "GLU_L6_17" "GLU_L6_18" "ALA_L6_19" "THR_L6_20"
 [71] "GLY_L6_21" "LEU_L6_22" "PRO_L6_23" "GLN_L6_24" "THR_L6_25" "VAL_L6_26" "VAL_L6_27" "CYS_L6_28" "TYR_L6_29" "ASN_L6_30" "PRO_L6_31" "ASP_L6_32" "GLN_L6_33" "ILE_L6_34"
 [85] "PRO_L6_35" "PRO_L6_36" "GLU_L6_37" "ASN_L6_38" "SER_L6_39" "PRO_L6_40" "LEU_L6_41" "MET_L6_42" "GLU_B3_1"  "ARG_B3_2"  "CYS_B3_3"  "PHE_B3_4"  "ILE_B3_5"  "CYS_B3_6" 
 [99] "ARG_B3_7"  "LEU_B3_8"  "ARG_B3_9"  "CYS_L7_1"  "LEU_L7_2"  "LEU_L7_3"  "ASP_L7_4"  "ASN_L7_5"  "SER_L7_6"  "SER_L7_7"  "GLY_L7_8"  "PHE_B4_1"  "LEU_B4_2"  "ALA_B4_3" 
[113] "MET_B4_4"  "ASN_B4_5"  "PHE_B4_6"  "GLN_B4_7"  "GLY_B4_8"  "LYS_B4_9"  "LEU_B4_10" "LYS_B4_11" "TYR_B4_12" "LEU_B4_13" "HIS_L8_1"  "GLY_L8_2"  "GLN_L8_3"  "LYS_L8_4" 
[127] "LYS_L8_5"  "LYS_L8_6"  "GLY_L8_7"  "LYS_L8_8"  "ASP_L8_9"  "GLY_L8_10" "SER_L8_11" "ILE_L8_12" "LEU_L8_13" "PRO_L8_14" "PRO_L8_15" "GLN_B5_1"  "LEU_B5_2"  "ALA_B5_3" 
[141] "LEU_B5_4"  "PHE_B5_5"  "ALA_B5_6"  "ILE_B5_7"  "ALA_B5_8"  "THR_B5_9"  "PRO_B5_10" NA          NA          NA          NA          NA          NA          NA         
[155] NA          NA          NA          NA          NA          NA          NA          NA          NA          NA          NA          NA          NA          NA         
[169] NA          NA

The issue came here, how I can compare and find the shared terms (ES: term="LEU_L8_13") and with which vector is shared?

Here is an example to start you off
I determine the elements that are possible to be shared; then for each I get an output of where they appear.
as a further step I count how many columns the shareable term appears in.

library(tidyverse)

(yourstart <- data.frame(v1=letters[1:6],
                        v2=letters[6:11],
                        v3=letters[c(1,2,3,4,9,11)]))

(shareable_terms <- sort(unique(unlist(yourstart))))

#for each shareable term find where it is
(results_1 <- map(shareable_terms,
       ~{yourstart |>
           summarise(across(.fns=\(x) .x %in% x))}) |> 
  set_names(shareable_terms))

(results_2 <- map(results_1,
                  ~length(.x[.x==TRUE])))
1 Like

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.