Hi,
This can be done with the pivot_wider function from tidyr
library(tidyverse)
myData = data.frame(
stringsAsFactors = FALSE,
KO = c("K02372","K00059","K07535",
"K02371","K00004","K16845","K16846","K00004","K00004",
"K00882","K11645","K19222","K01661","K19181",
"K00219","K00219","K01834","K15633","K15635","K01858",
"K01858","K00216","K01252","K10621","K01617","K02554",
"K10676","K01703","K01704","K15067","K10217",
"K01617","K02554","K10217"),
Pathway = c("(5Z)-dodec-5-enoate biosynthesis","(5Z)-dodec-5-enoate biosynthesis",
"(5Z)-dodec-5-enoate biosynthesis","(5Z)-dodec-5-enoate biosynthesis",
"(R)-acetoin biosynthesis I","(R)-cysteate degradation",
"(R)-cysteate degradation",
"(R,R)-butanediol biosynthesis","(R,R)-butanediol degradation",
"1,3-propanediol biosynthesis (engineered)",
"1,3-propanediol biosynthesis (engineered)","1,4-dihydroxy-2-naphthoate biosynthesis",
"1,4-dihydroxy-2-naphthoate biosynthesis",
"1,5-anhydrofructose degradation",
"10-cis-heptadecenoyl-CoA degradation (yeast)",
"10-trans-heptadecenoyl-CoA degradation (reductase-dependent, yeast)",
"1-butanol autotrophic biosynthesis","1-butanol autotrophic biosynthesis",
"1-butanol autotrophic biosynthesis",
"1D-myo-inositol hexakisphosphate biosynthesis III (Spirodela polyrrhiza)",
"1D-myo-inositol hexakisphosphate biosynthesis IV (Dictyostelium)",
"2,3-dihydroxybenzoate biosynthesis",
"2,3-dihydroxybenzoate biosynthesis","2,3-dihydroxybenzoate degradation",
"2,3-dihydroxybenzoate degradation",
"2,3-dihydroxybenzoate degradation","2,4-dichlorophenoxyacetate degradation",
"2,5-xylenol and 3,5-xylenol degradation",
"2,5-xylenol and 3,5-xylenol degradation",
"2-amino-3-carboxymuconate semialdehyde degradation to 2-oxopentenoate",
"2-amino-3-carboxymuconate semialdehyde degradation to 2-oxopentenoate",
"2-amino-3-carboxymuconate semialdehyde degradation to 2-oxopentenoate",
"2-amino-3-carboxymuconate semialdehyde degradation to 2-oxopentenoate",
"2-amino-3-carboxymuconate semialdehyde degradation to glutaryl-CoA")
)
myData = myData %>%
mutate(present = 1) %>%
pivot_wider(KO, names_from = Pathway, values_from = present, values_fill = 0)
myData
#> # A tibble: 27 x 20
#> KO `(5Z)-dodec-5-eno~ `(R)-acetoin bi~ `(R)-cysteate d~ `(R,R)-butanedio~
#> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 K02372 1 0 0 0
#> 2 K00059 1 0 0 0
#> 3 K07535 1 0 0 0
#> 4 K02371 1 0 0 0
#> 5 K00004 0 1 0 1
#> 6 K16845 0 0 1 0
#> 7 K16846 0 0 1 0
#> 8 K00882 0 0 0 0
#> 9 K11645 0 0 0 0
#> 10 K19222 0 0 0 0
#> # ... with 17 more rows, and 15 more variables:
#> # (R,R)-butanediol degradation <dbl>,
#> # 1,3-propanediol biosynthesis (engineered) <dbl>,
#> # 1,4-dihydroxy-2-naphthoate biosynthesis <dbl>,
#> # 1,5-anhydrofructose degradation <dbl>,
#> # 10-cis-heptadecenoyl-CoA degradation (yeast) <dbl>,
#> # 10-trans-heptadecenoyl-CoA degradation (reductase-dependent, yeast) <dbl>,
#> # 1-butanol autotrophic biosynthesis <dbl>,
#> # 1D-myo-inositol hexakisphosphate biosynthesis III (Spirodela polyrrhiza) <dbl>,
#> # 1D-myo-inositol hexakisphosphate biosynthesis IV (Dictyostelium) <dbl>,
#> # 2,3-dihydroxybenzoate biosynthesis <dbl>,
#> # 2,3-dihydroxybenzoate degradation <dbl>,
#> # 2,4-dichlorophenoxyacetate degradation <dbl>,
#> # 2,5-xylenol and 3,5-xylenol degradation <dbl>,
#> # 2-amino-3-carboxymuconate semialdehyde degradation to 2-oxopentenoate <dbl>,
#> # 2-amino-3-carboxymuconate semialdehyde degradation to glutaryl-CoA <dbl>
Created on 2021-08-27 by the reprex package (v2.0.1)
Hope this helps,
PJ