Filtering by string and operating with rows

Hi everyone. I have a database of medical data with an ID (sample_name) made up by alphanumeric code that follows e.g.:

Example: A2589200014000006001 which accounts for

Project: A25

Center: 892

Patient: 00014

Disease: 00

Visit: 000 (baseline) / 701 (1 year visit)

Sample: 60

Specimen: 01

What I am trying to accomplish is to operate RQ values based on 1-year follow-up according to 701 code between follow-ups

database <- structure(list(sample_name = c("A2571900003007016001", "A2592000026007016001", 
"A2592000036007016001", "A2542900002007016001", "A2552500037000006001", 
"A2582200025000006001", "A2555600056007016001", "A2537700008000006001", 
"A2537700009007016001", "A2564800002000006001", "A2589600037000006001", 
"A2552500020007016001", "A2589600034000006001", "A2552500020000006001", 
"A2571900001007016001", "A2582200032007016001", "A2582200020000006001", 
"A2537700011000006001", "A2582200012007016001", "A2589800008007016001", 
"A2591900088007016001", "A2541700027000006001", "A2537700030007016001", 
"A2561700001000006001", "A2561900011007016001", "A2537700029000006001", 
"A2537700010000006001", "A2537700032000006001", "A2552500026007016001", 
"A2591800088000006001", "A2537600099007016001", "A2537700008007016001", 
"A2582200021000006001", "A2537700013007016001", "A2555600055007016001", 
"A2589600034000006001", "A2561900012007016001", "A2582200012007016001", 
"A2555600056007016001", "A2582200019000006001", "A2543900001007016001", 
"A2564800001000006001", "A2555100021000006001", "A2592000033000006001", 
"A2552500027000006001", "A2537700031000006001", "A2552500026000006001", 
"A2541700026007016001", "A2576000026007016001", "A2571200024000006001", 
"A2552500026007016001", "A2571900005007016001", "A2556300014000006001", 
"A2592000028007016001", "A2582200035000006001", "A2592000023007016001", 
"A2552500034000006001", "A2555600055007016001", "A2582200019007016001", 
"A2582200022000006001", "A2576000006007016001", "A2555300047000006001", 
"A2589200008007016001", "A2582200035007016001", "A2592000028000006001", 
"A2576000006007016001", "A2571200024007016001", "A2589400004007016001", 
"A2589500045007016001", "A2592000038000006001", "A2555100024007016001", 
"A2537700031007016001", "A2582200034000006001", "A2537600099007016001", 
"A2537700016007016001", "A2589600038000006001", "A2554700034007016001", 
"A2537700032000006001", "A2552500030000006001", "A2535500019000006001", 
"A2576000006000006001", "A2571900001000006001", "A2582200030000006001", 
"A2552500037000006001", "A2589200014007016001", "A2571200028007016001", 
"A2561700001007016001", "A2592000024007016001", "A2582200026007016001", 
"A2540900001007016001", "A2576100007000006001", "A2582200021000006001", 
"A2544800001007016001", "A2576000007000006001", "A2582200020007016001", 
"A2582200033000006001", "A2564800003007016001", "A2537700029000006001", 
"A2555300047000006001", "A2555100024007016001", "A2555300047007016001", 
"A2582200026007016001", "A2589200012007016001", "A2571900003007016001", 
"A2537700015000006001", "A2589800009000006001", "A2592000029000006001", 
"A2582200030000006001", "A2535500019000006001", "A2589200013007016001", 
"A2574500061007016001", "A2582200020007016001", "A2591800099007016001", 
"A2591900092000006001", "A2537700006007016001", "A2576000017000006001", 
"A2561900013007016001", "A2576000017000006001", "A2582200012000006001", 
"A2582200029000006001", "A2576000017000006001", "A2555100022007016001", 
"A2582200033007016001", "A2537700029000006001", "A2582200011000006001", 
"A2582200030007016001", "A2591900088000006001", "A2537700031007016001", 
"A2576000006000006001", "A2555300047007016001", "A2582200019007016001", 
"A2571200030007016001", "A2592000036007016001", "A2592000036000006001", 
"A2589600038007016001", "A2589200008007016001", "A2571200027000006001", 
"A2582200029007016001", "A2571900001000006001", "A2589200008007016001", 
"A2592000036007016001", "A2589200010000006001", "A2571900007000006001", 
"A2552500021000006001", "A2592000027000006001", "A2537700009000006001", 
"A2582200037000006001", "A2576000014007016001", "A2571200023007016001", 
"A2537700017007016001", "A2537700011000006001", "A2589600034007016001", 
"A2552500027000006001", "A2552500033000006001", "A2537700011000006001", 
"A2592000038000006001", "A2535500019007016001", "A2591800088007016001", 
"A2556300014007016001", "A2556300014007016001", "A2592000030000006001", 
"A2540900001007016001", "A2589800008000006001", "A2576000014000006001", 
"A2571900008007016001", "A2561500002000006001", "A2582200032007016001", 
"A2591900095000006001", "A2591800088007016001", "A2592000023000006001", 
"A2564800003000006001", "A2582200012000006001", "A2552500021007016001", 
"A2540900001000006001", "A2576100015000006001", "A2537700027007016001", 
"A2576000007007016001", "A2537700017000006001", "A2552500034007016001", 
"A2592000035000006001"), RQ = c(1.08197462642302, 1.49002169146583, 
2.2626289256918, 1.59364836099312, 1.16958766405195, 1.15722281085647, 
1.90395581699507, 1.16070391438372, 1.42207741058728, NA, 2.01623900584052, 
1.32715174233857, 0.937137946702273, 0.379016702964743, 1.19471513515602, 
1.45969471059855, 1.83062125071651, 1.19803218514883, 2.57933650131177, 
0.971083101781619, 0.844400887423782, 1.21644126855065, 1.23342215585474, 
1.33299065527035, 1.15535269687227, 1.01091848210074, 1.36225803464049, 
1.70133432190171, 0.179451596914852, 2.44697608226373, 2.76702050165719, 
0.854409740889734, 1.60250997083131, 1.02266478905362, 0.943874312681693, 
0.885972198499773, 0.839537496184133, 0.99723125135207, 0.638164384414478, 
1.87081469574624, 1.13053056712459, 0.726146896129157, 1.29115899476399, 
0.775214073170476, 1.00393555807285, 1.43561277531882, 1.12635895425256, 
1.2466011942751, 0.584118641779783, 0.770571108358409, 4.80210406012398, 
0.768437590644007, 1.10114159809796, 1.9683680440676, 1, 0.851453707748945, 
0.1666623346364, 1.24372425877751, 2.32462821503624, 0.523405140987288, 
1.03622215405832, 1.13393137629285, 0.970634469769548, 2.42054737903178, 
NA, 0.709233866728796, 1.31281776506912, 2.04864025497385, 0.493002431338097, 
0.600540452021357, 0.493800430722687, 1.88905756659272, 1.17609125029097, 
2.95535888117926, 0.751754411118803, 1.12375951661894, 0.976708528962229, 
0.858367088615479, 1.06191380396236, 1.28075986132977, 0.868942930406155, 
0.816203046150902, 1.21307324843306, 0.472701058375372, 0.951757980304535, 
0.694798558820258, 5.23189407871883, 1.2938466778861, 1.0614232089498, 
1.90131820245963, NA, 0.574481895667812, 2.846784593394, 1.26927088601981, 
1.88121751127258, 0.610332223355819, 2.46285780193722, 1.86951839513, 
0.523889093650146, 0.968618189226629, 0.880869374126979, 1.15508578455358, 
0.763305945229095, 1.59955063929534, 0.799221149722627, 0.70907001783973, 
1.14419530791605, 1.2593389772436, 3.37760354969513, 0.579280229789139, 
1.52837652076517, 0.963039358974067, 0.35675362642947, 0.335643125695067, 
0.84109072602697, 0.900001929793514, 1.09378814701508, 1.45598954899909, 
1.18701102375694, 0.855792732948346, 1.54221082540794, 0.952637998043937, 
0.730522189272839, 1.0707833911119, 0.96638278943318, 1.35441200570982, 
0.856188284546456, 2.09604060687005, 0.578878842457425, 1.14076371586843, 
2.26106113424919, 9.77886256349568, 1.8881848383005, NA, 0.57700937588178, 
2.10672207190967, 0.989656656415207, 2.64695438549274, 1.29863860272989, 
1.51676754537444, 1.87384289374485, 1.2637110854608, 0.72547610391709, 
0.479964630519455, 1.63052135171292, 0.749672992109442, NA, 1.02526723788859, 
2.24026075844427, 0.935839697736695, 0.320560077029036, 0.135246828302878, 
1.03646159987396, 1.59364836099312, 0.722298293338962, 0.272879010419556, 
1.96337171049351, 1.65748980897599, 1.52414483033977, 1.11265012058483, 
NA, 1.47324768581648, 1.04198415065663, 0.86294070314942, 0.553376518901261, 
1.7021206869689, 5.49708453933297, 0.917215940605446, 1.65748980897599, 
1.32439487523883, 0.310643836121484, 1.17473336089929, 0.768437590644006, 
1.4382688051156, 0.940608755636287, 0.798851916449149, 1.17175152030837, 
0.603182578991095, 1.18153852202942, 0.89089871814034)), row.names = c(NA, 
-180L), class = c("tbl_df", "tbl", "data.frame"))

I have a database of medical data with an ID made up by alphanumeric code that follows e.g.:

Example: A2589200014000006001

Project: A25

Center: 892

Patient: 00014

Disease: 00

Visit: 000 (baseline) / 701 (1 year visit)

Sample: 60

Specimen: 01

What I am trying to accomplish is to substract RQ values based on 1-year follow-up according to 701 code

I am pretty new in terms of string topic, I find it quite tricky and hard. I have read several approaches with a more traditional separate, or modern with stringr (str_detect or mystrsplit)

The idea is to substract RQ values from sample_name: 701 - 000 but matches on the other digits of the sample name (project, patient....)

1 Like

Hi @Javier9,
Im find this solutions hope could help you for start the analises of this data

Check the Sample and Specimen column because my pc change the final numbers.

library(stringr)
options(digits=19)

str(database)
database %>% 
  mutate(P_letter = str_extract(sample_name, 'A'), 
                   number = as.numeric(parse_number(sample_name)),
                   Project=str_sub(number,1,2),
                   Center= str_sub(number,3,5),
                   Patient= str_sub(number,6,10),
                   Disease = str_sub(number,11,12),
                   Visit= str_sub(number,13,15),
                   Sample= str_sub(number,16,17), # this digits changes
                   Specimen=str_sub(number,18,19), # this digits changes
                   Sample = replace(Sample, Sample == 59, 60), # this digits changes
                   Sample = replace(Sample, Sample == 61, 60), # this digits changes
                   Specimen = replace(Specimen, Specimen == 36, 01), # this digits changes
                   Specimen = replace(Specimen, Specimen == 44, 01)) # this digits changes

#number Center Patient Disease Visit Sample Specimen
#<dbl>  <chr>  <chr>   <chr>   <chr>  <chr>  <chr>
# 2571900003007015936	25	719	00003	00	701	60	1
# 2592000026007015936	25	920	00026	00	701	60	1
# 2592000036007015936	25	920	00036	00	701	60	1
# 2542900002007015936	25	429	00002	00	701	60	1
# 2552500037000006144	25	525	00037	00	000	60	1
# 2582200025000006144	25	822	00025	00	000	60	1
# 2555600056007015936	25	556	00056	00	701	60	1
# 2537700008000006144	25	377	00008	00	000	60	1
# 2537700009007015936	25	377	00009	00	701	60	1
# 2564800002000006144	25	648	00002	00	000	60	1

It worked. Now I need to find some syntax to filter per column matching every column and just varying the visit column
Thank you

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.