I am trying to convert all ICD codes in a tab separated file 002.txt to Phecodes (based on ICD-Phecode conversion tables tab separated files phecode_icd9_rolled.csv and Phecode_map_v1_2_icd10cm_beta.csv) for a biology bioinformatics project.
002.txt named patientdata anonymized table I need to replace all ICD codes with Phecodes but skip column names and first column row names
1 2 3 4 5 6 7 8 9
patient1 K40.9 K43.9 N20.0 N20.1 N23 N39.0 R69 Z88.1
patient2 B96.8 D12.6 E11.6 E87.6 I44.7 K40.9 K43.9 K52.9
patient3 NOT
Phecode_map_v1_2_icd10cm_beta.csv named ICCD10csv conversion table ICD10 to phecode
icd10cm phecode
K40.9 550.1
K43.9 550.5
N20.0 594.1
N20.1 594.3
N23 594.8
N39.0 591
R69 1019
Z88.1 960.1
B96.8 041
D12.6 208
E11.6 250.2
E87.6 276.14
I44.7 426.32
K40.9 550.1
K43.9 550.5
K52.9 558
XNO 17
library(tidyverse)
patientdata <- read_tsv("002.txt")
ICD9csv <- read_csv("phecode_icd9_rolled.csv")
ICCD10csv <- read_csv("Phecode_map_v1_2_icd10cm_beta.csv")
This link explains the various joins that tidyverse can do, and the closest to what I want is I think semi_join()
https://r4ds.had.co.nz/relational-data.html
Not sure if this is right
patientdata %>%
semi_join(ICCD10csv, by = "icd10cm")
I am also trying out merge() but am getting error messages
https://www.rdocumentation.org/packages/data.table/versions/1.14.0/topics/merge
patientdata10 <- merge(patientdata9, ICCD10csv, by.x = all, by.y = "icd10cm")
Error in as.vector(x, mode) :
cannot coerce type 'builtin' to vector of type 'any'
# This is trying to specify by.x as all elements in the first row of patientdata, meaning by.x will merge on all columns of patientdata
patientdata10 <- merge(patientdata9, ICCD10csv, by.x = patientdata9[,1], by.y = "icd10cm")
Error in fix.by(by.x, x) :
'by' must specify one or more columns as numbers, names or logical
patientdata10 <- merge(patientdata9, ICCD10csv, by.x = patientdata9[1,], by.y = "icd10cm")
Error in fix.by(by.x, x) :
'by' must specify one or more columns as numbers, names or logical
REPREX:
library(tidyverse)
library(data.table)
patientdata <- data.table(
1 = c(patient1, patient2, patient3),
2 = c(K40.9, B96.8, NOT),
3 = c(K43.9, D12.6),
4 = c(N20.0, E11.6),
5 = c(N20.1, E87.6),
6 = c(N23, I44.7),
7 = c(N39.0, K40.9),
8 = c(R69, K43.9),
9 = c(Z88.1, K52.9)
)
ICCD10csv <- data.table(
icd10cm = c("K40.9", "K43.9", "N20.0", "N20.1", "N23", "N39.0", "R69", "Z88.1", "B96.8", "D12.6", "E11.6", "E87.6", "I44.7", "K40.9", "K43.9", "K52.9", "XNO"),
phecode = c("550.1", "550.5", "594.1", "594.3", "594.8", "591", "1019", "960.1", "041", "208", "250.2", "276.14", "426.32", "550.1", "550.5", "558", "17")
)
# END OF SUCCESSFUL CODE START OF GUESSES
patientdata %>%
semi_join(ICCD10csv, by = "icd10cm")
patientdata10 <- merge(patientdata, ICCD10csv, by.x = all, by.y = "icd10cm")
# This is trying to specify by.x as all elements in the first row of patientdata, meaning by.x will merge on all columns of patientdata
patientdata10 <- merge(patientdata, ICCD10csv, by.x = patientdata[,1], by.y = "icd10cm")
patientdata10 <- merge(patientdata, ICCD10csv, by.x = patientdata[1,], by.y = "icd10cm")