Dear R Masters,
I have a following challenge. I have a data frame with responses about specific models (variable ModelLong) which looks like that:
data.frame(stringsAsFactors=FALSE,
URN = c("9x013122510", "9x013680557", "9x014145627",
"9x015094359", "9x013122478",
"9x012544645", "9x013122457",
"9x015094333", "9x014145611",
"9x014963276", "9x015104994", "9x013233865",
"9x015094394", "9x013918536",
"9x013122546", "9x013122646",
"9x013359154", "9x013122574",
"9x013122548", "9x015104981", "9x013918605",
"9x013122618", "9x013918581",
"9x013122341", "9x013122547",
"9x013122698", "9x012654879",
"9x013703410", "9x013122485", "9x014853449",
"9x013122486", "9x014032625",
"9x013122697", "9x013122342",
"9x012545899", "9x014145624",
"9x013233864", "9x013703438", "9x013918532",
"9x014635481", "9x012544643",
"9x013918584", "9x013122284",
"9x012544539", "9x013680544", "9x013918572",
"9x013918545", "9x014032615",
"9x014635460", "9x015094372"),
InterviewDate = c("2018-06-04 16:42:00", "2018-05-21 08:30:00",
"2018-10-15 13:04:32",
"2018-12-17 11:03:42",
"2018-06-04 21:25:00", "2017-03-20 20:54:00",
"2018-06-05 10:23:00",
"2018-12-23 12:01:17", "2018-10-15 19:02:13",
"2018-12-10 09:02:05",
"2018-12-22 11:01:14", "2018-05-26 15:10:00",
"2018-12-20 22:01:27",
"2018-05-18 08:59:00", "2018-06-04 14:57:00",
"2018-05-31 21:02:00",
"2018-05-25 16:05:00", "2018-06-04 12:27:00",
"2018-06-04 14:28:00",
"2018-12-29 21:01:06", "2018-09-25 14:01:32",
"2018-06-03 22:41:00",
"2018-10-01 09:01:57", "2017-01-18 08:24:00",
"2018-06-04 14:49:00",
"2018-05-28 13:46:00", "2017-01-09 11:02:00",
"2018-05-20 09:19:00",
"2018-06-04 19:45:00", "2018-12-01 12:01:20",
"2018-06-04 19:14:00",
"2018-10-08 08:02:09", "2018-05-28 14:14:00",
"2017-01-12 09:18:00",
"2017-01-01 10:33:00",
"2018-10-15 13:04:32", "2018-05-26 16:21:00",
"2018-05-18 19:16:00",
"2018-05-18 10:54:00", "2018-11-16 20:01:54",
"2017-02-02 10:51:00",
"2018-09-25 12:02:14", "2017-01-15 14:32:00",
"2017-01-28 00:11:00",
"2018-05-21 09:50:00", "2018-09-26 11:02:27",
"2018-05-17 19:40:00",
"2018-10-02 11:02:39", "2018-11-24 13:01:10",
"2018-12-19 15:01:21"),
A1 = c(100, 100, 90, 100, 80, 50, 90, 80, 70, 90, 90, 90, 100,
100, 100, 90, 90, 100, 100, 100,
100, 100, 100, 100, 80, 90, 80,
100, 100, 80, 90, 80, 90, 80, 80,
80, 100, 90, 100, 90, 100, 90, 90,
90, 100, 100, 100, 80, 100, 80),
A2 = c(100, 100, 90, 100, 80, 40, 100, 80, 70, 90, 90, 90, 100,
100, 100, 90, 90, 100, 100, 100,
100, 100, 100, 100, 80, 90, 80,
100, 100, 80, 90, 80, 90, 90, 90,
80, 100, 90, 100, 90, 100, 90, 90,
90, 100, 100, 100, 80, 100, 80),
B1 = c(100, 100, 90, 100, 90, 60, 90, 80, 80, 90, 100, 90, 90,
100, 100, 90, 90, 90, 100, 100,
100, 100, 100, 100, 80, 80, 80,
100, 100, 80, 80, 80, 80, 80, 90,
100, 90, 80, 100, 80, 80, 90, 80,
90, 100, 100, 100, 90, 80, 80),
B2 = c(NA, NA, NA, NA, NA, 60, NA, 80, 80, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 80, 70, 90, NA, NA, 80,
80, 80, 80, 90, NA, NA, NA, 80, NA,
80, 100, NA, 80, NA, NA, NA, NA,
NA, 80, 80),
B3 = c(NA, NA, NA, NA, NA, 60, NA, 80, 80, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 80, 80, 80, NA, NA, 80,
80, 80, 80, 90, NA, NA, NA, 70, NA,
80, 100, NA, 80, NA, NA, NA, NA,
NA, 90, 80),
B4 = c(NA, NA, NA, NA, NA, 50, NA, 80, 80, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 80, 80, 80, NA, NA, 80,
90, 80, 80, 90, NA, NA, NA, 70, NA,
80, 100, NA, 80, NA, NA, NA, NA,
NA, 80, 80),
B5 = c(NA, NA, NA, NA, NA, 50, NA, NA, 80, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 80, 70, 80, NA, NA, 80,
90, 80, 90, 80, NA, NA, NA, 70, NA,
90, 100, NA, 80, NA, NA, NA, NA,
NA, 90, 80),
B6 = c(NA, NA, NA, NA, NA, NA, NA, 80, 80, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 80, 10, 90, NA, NA, 80,
80, 80, 60, 70, NA, NA, NA, 70, NA,
100, 100, NA, 80, NA, NA, NA, NA,
NA, 80, 80),
OS21 = c(NA, NA, NA, NA, NA, 50, NA, 80, 80, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 80, 80, 90, NA, NA, 80,
90, 80, 80, 80, NA, NA, NA, 80, NA,
90, 100, NA, 80, NA, NA, NA, NA,
NA, 100, 80),
C1 = c(100, 100, 100, 100, 80, 50, 90, 90, 80, 100, 90, 90, 90,
100, 100, 100, 90, 100, 100,
100, 100, 100, 100, 100, 80, 100, 90,
100, 100, 80, 90, 80, 90, 90, 80,
80, 100, 90, 100, 90, 100, 90,
90, 90, 100, 100, 100, 90, 90, 90),
C2 = c(100, 100, 100, 100, 90, 50, 100, 90, 70, 100, 100, 80,
100, 100, 100, 100, 100, 100,
100, 100, 100, 100, 100, 100, 80,
100, 80, 100, 100, 80, 90, 80, 90,
90, 100, 80, 100, 100, 100, 100,
100, 90, 90, 60, 100, 100, 100, 100,
100, 80),
C3 = c(100, 100, 100, 100, 90, 60, 100, 90, 80, 100, 90, 90, 90,
100, 100, 100, 90, 100, 100,
100, 100, 100, 100, 100, 80, 90, 80,
100, 100, 80, 90, 80, 90, 90, 100,
80, 100, 80, 100, 90, 100, 90,
90, 80, 100, 100, 100, 90, 100, 80),
C4 = c(NA, NA, NA, NA, 90, 60, NA, NA, 90, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 80, NA, NA, NA, NA, 80,
NA, 80, NA, NA, 100, 90, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA),
C5 = c(NA, NA, NA, NA, 90, 60, NA, NA, 90, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 80, NA, NA, NA, NA, 80,
NA, 80, NA, NA, 100, 80, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA),
C6 = c(NA, NA, NA, NA, 90, 60, NA, NA, 80, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 80, NA, NA, NA, NA, 80,
NA, 80, NA, NA, 100, 90, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA),
C7 = c(NA, NA, NA, NA, 90, 60, NA, NA, 90, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 80, NA, NA, NA, NA, 80,
NA, 80, NA, NA, 100, 100, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA),
C8 = c(NA, NA, NA, NA, 90, 60, NA, NA, 90, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 80, NA, NA, NA, NA, 80,
NA, 80, NA, NA, 100, 90, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA),
D1 = c(100, 100, 90, 100, 80, 990, 90, -10, 100, -10, -10, 100,
-10, 100, 100, 90, 80, 90, 100,
100, 100, 100, -10, 100, 990, 100,
80, 100, 100, 990, 90, 60, 90,
100, 80, 90, 100, 90, 100, -10, 70,
-10, -10, 990, 100, 100, 100, 80,
-10, 80),
E1 = c(100, 100, 100, 100, 90, 70, 90, 90, 80, 100, 100, 80, 100,
100, 100, 90, 90, 100, 100, 100,
100, 100, 100, 100, 80, 80, 80,
100, 100, 80, 90, 80, 90, 90, 90,
80, 100, 80, 100, 90, 100, 100, 90,
90, 100, 100, 100, 80, 90, 80),
E2 = c(100, 100, 100, 100, 90, 80, 50, 90, 90, 100, 100, 90, 100,
100, 100, 90, 90, 100, 100, 80,
100, 100, 100, 100, 80, 100, 80,
100, 100, 80, 90, 80, 90, 100, 100,
90, 100, 90, 100, 90, 100, 90,
80, 70, 100, 100, 100, 80, 100, 80),
E3 = c(NA, NA, NA, NA, NA, 70, NA, NA, 80, NA, NA, 90, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 80, 100, 80, NA, NA, 70,
NA, 80, NA, NA, NA, 90, NA, 90, NA,
NA, NA, NA, NA, NA, NA, NA, NA,
80, NA, 80),
OS17 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA),
F1 = c(100, 100, 100, 100, 90, 60, 90, 90, 80, 100, 100, 90, 100,
100, 100, 90, 90, 90, 100, 100,
100, 100, 100, 100, 80, 100, 70,
100, 100, 80, 100, 80, 90, 80, 100,
90, 100, 90, 100, 90, 100, 90,
90, 90, 100, 100, 100, 80, 100, 80),
F2 = c(NA, NA, NA, NA, NA, 80, NA, NA, 100, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 80, NA, 80, NA, NA, 80,
NA, 80, NA, 100, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA,
90, NA, 80),
F3 = c(NA, NA, NA, NA, NA, 80, NA, NA, 80, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 80, NA, 80, NA, NA, 80,
NA, 80, NA, 100, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA,
90, NA, 80),
F4 = c(NA, NA, NA, NA, NA, 60, NA, NA, 100, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 80, NA, 80, NA, NA, 80,
NA, 80, NA, 100, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA,
80, NA, 80),
F5 = c(NA, NA, NA, NA, NA, 60, NA, NA, 60, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 80, NA, 80, NA, NA, 80,
NA, 80, NA, 90, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA,
80, NA, 70),
F6 = c(NA, NA, NA, NA, NA, 70, NA, NA, 60, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 80, NA, 10, NA, NA, 50,
NA, 80, NA, 90, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA,
70, NA, 70),
F7 = c(NA, NA, NA, NA, NA, 70, NA, NA, 60, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 80, NA, 60, NA, NA, 80,
NA, 80, NA, 90, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA,
60, NA, 60),
G1 = c(100, 100, 0, 100, 0, 60, 90, 0, 90, 100, 100, 60, 0, 100,
100, 90, 70, 100, 100, 100, 100,
0, 100, 100, 0, 100, 0, 100, 0,
80, 0, 80, 90, 80, 0, 0, 100, 90,
100, 90, 100, 0, 0, 90, 100, 100,
100, 0, 0, 0),
ModelLong = c("ccc", "bbb", "eee", "eee", "ccc (2016 ~ )",
"ccc (2016 ~ )",
"ccc (2016 ~ )", "ccc (2016 ~ )",
"ccc (2016 ~ )", "ccc (2016 ~ )",
"ccc (2016 ~ )", "ccc (2016 ~ )",
"ccc (2016 ~ )", "ccc (2016 ~ )",
"ccc (2016 ~ )", "ccc (2016 ~ )",
"ccc (2016 ~ )", "ccc (2016 ~ )",
"ccc (2016 ~ )", "ccc (2016 ~ )",
"ccc (2016 ~ )", "ccc (2016 ~ )",
"ccc (2016 ~ )", "ccc (2016 ~ )",
"ccc (2016 ~ )", "ccc (2016 ~ )", "bbb", "bbb",
"bbb", "bbb", "bbb", "bbb", "bbb",
"bbb", "ccc (2016 ~ )",
"ccc (2016 ~ )", "ccc (2016 ~ )",
"ccc (2016 ~ )", "ccc (2016 ~ )",
"ccc (2016 ~ )", "ccc (2016 ~ )",
"aaa (2014 ~ )", "aaa (2014 ~ )",
"aaa (2014 ~ )", "aaa (2014 ~ )",
"aaa (2014 ~ )", "aaa (2014 ~ )",
"aaa (2014 ~ )", "aaa (2014 ~ )",
"aaa (2014 ~ )"),
A2TB = c(100, 100, 100, 100, 0, 0, 100, 0, 0, 100, 100, 100, 100,
100, 100, 100, 100, 100, 100,
100, 100, 100, 100, 100, 0, 100, 0,
100, 100, 0, 100, 0, 100, 100,
100, 0, 100, 100, 100, 100, 100, 100,
100, 100, 100, 100, 100, 0, 100,
0)
)
Now I would like to do some plots using ModelLong. Nevertheless I would like to include only the most frequent models. Is any clever way of including models with frequency > 5%? If this option is too complicated, including models with more than 5 responses? Is it possible to recode any other model into "Other" type automatically?
I wonder if any data manipulation is required or I could do it directly in ggplot.
I know that many online visualisation providers offer this solution.
Can you help?