Calculating percentages per group not working

Hi.

I want to group my data by variables, and then calculate the percentage of some value for each of those groups.

Specifically, I want to group my data below by population (pop) and instance within that population (e.g., n9, n10) and calculate the number of loci for each SNP (e.g., 0, 1). For example:

stromsrum (population) n10 (instance) there are in total 189032 loci, and the for snp = 0 it would calculate 134914 / 189032 = 0.71.

Any ideas?

I've tried using the following code without success:

opt_n %>% group_by(pop, instance) %>% mutate(loci_tot = opt_n$loci/sum(opt_n$loci))

Here's a part of my data:

data.frame(
stringsAsFactors = FALSE,
row.names = c("410","411","412","413",
"414","415","416","417","418","419","420","421",
"422","423","424","425","426","427","428","429","430",
"431","432","433","434","435","436","437","438",
"439","440","441","442","443","444","445","446",
"447","448","449","450","451","452","453","454",
"455","456","457","458","459","460","461","462",
"463","464","465","466","467","468","469","470",
"471","472","473","474","475","476","477","478",
"479","480","481","482","483","484","485","486","487",
"488","489","490","491","492","493","494","495",
"496","497","498","499","500","501","502","503",
"504","505","506","507","508","509","510"),
pop = c("stromsrum","stromsrum",
"stromsrum","stromsrum","stromsrum","stromsrum",
"stromsrum","stromsrum","stromsrum","stromsrum","stromsrum",
"stromsrum","stromsrum","stromsrum","stromsrum",
"stromsrum","stromsrum","stromsrum","stromsrum","stromsrum",
"stromsrum","stromsrum","stromsrum","stromsrum",
"stromsrum","stromsrum","stromsrum","stromsrum",
"stromsrum","stromsrum","stromsrum","stromsrum","stromsrum",
"stromsrum","stromsrum","stromsrum","stromsrum",
"stromsrum","stromsrum","stromsrum","stromsrum","stromsrum",
"stromsrum","stromsrum","stromsrum","stromsrum",
"stromsrum","stromsrum","stromsrum","stromsrum",
"stromsrum","stromsrum","stromsrum","stromsrum","stromsrum",
"stromsrum","stromsrum","stromsrum","stromsrum",
"stromsrum","stromsrum","stromsrum","stromsrum",
"stromsrum","stromsrum","stromsrum","stromsrum","stromsrum",
"stromsrum","stromsrum","stromsrum","stromsrum",
"stromsrum","stromsrum","stromsrum","stromsrum","stromsrum",
"stromsrum","stromsrum","stromsrum","stromsrum",
"stromsrum","stromsrum","stromsrum","stromsrum",
"stromsrum","stromsrum","halltorp","halltorp","halltorp",
"halltorp","halltorp","halltorp","halltorp","halltorp",
"halltorp","halltorp","halltorp","halltorp","halltorp",
"halltorp"),
instance = c("n9","n9","n9","n9","n9",
"n9","n9","n9","n9","n9","n9","n9","n9","n9","n9",
"n9","n9","n9","n9","n9","n9","n9","n9","n9",
"n9","n9","n9","n9","n9","n10","n10","n10","n10",
"n10","n10","n10","n10","n10","n10","n10","n10",
"n10","n10","n10","n10","n10","n10","n10","n10",
"n10","n10","n10","n10","n10","n10","n10","n10",
"n10","n10","n10","n10","n10","n10","n10","n10",
"n10","n10","n10","n10","n10","n10","n10","n10","n10",
"n10","n10","n10","n10","n10","n10","n10","n10",
"n10","n10","n10","n10","n10","n1","n1","n1",
"n1","n1","n1","n1","n1","n1","n1","n1","n1","n1",
"n1"),
snps = c(32L,33L,34L,35L,36L,37L,
38L,39L,40L,41L,42L,43L,44L,45L,46L,47L,48L,49L,
50L,51L,52L,53L,55L,56L,57L,58L,59L,61L,68L,
0L,1L,2L,3L,4L,5L,6L,7L,8L,9L,10L,11L,12L,
13L,14L,15L,16L,17L,18L,19L,20L,21L,22L,23L,24L,
25L,26L,27L,28L,29L,30L,31L,32L,33L,34L,35L,
36L,37L,38L,39L,40L,41L,42L,43L,44L,45L,46L,47L,
48L,49L,50L,51L,52L,53L,54L,55L,56L,59L,0L,
1L,2L,3L,4L,5L,6L,7L,8L,9L,10L,11L,12L,13L),
loci = c(36L,33L,28L,20L,28L,16L,
14L,13L,14L,7L,5L,4L,8L,5L,7L,3L,4L,5L,2L,2L,
2L,1L,1L,2L,2L,1L,1L,1L,1L,134914L,11323L,
7807L,6041L,4752L,3863L,3207L,2722L,2256L,1960L,
1631L,1321L,1051L,913L,814L,661L,558L,477L,412L,351L,
294L,242L,200L,192L,157L,133L,97L,95L,84L,70L,
77L,57L,41L,30L,27L,26L,33L,19L,23L,16L,15L,
9L,9L,7L,11L,5L,4L,5L,3L,3L,1L,2L,1L,3L,1L,
3L,2L,1L,226549L,17545L,9240L,5943L,4188L,3009L,
2190L,1646L,1193L,810L,594L,412L,304L,275L)
)

Hi @niko_bio
Try this:

opt_n %>% 
  group_by(pop, instance) %>% 
  mutate(loci_tot = sum(loci),
         loci_prop = loci/loci_tot) %>% 
  ungroup()

Thank you very much, it worked.

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.