How to get the speaker of a language > 25% in province?

I want to get the village that the speaker of language (775) is > 25%.

775 is the language code. Here is the data

structure(list(WEIGHT = c(9.44, 10, 10, 14, 14, 14, 11, 11, 11, 
11, 10.25, 10, 10, 10, 11, 11, 10, 10, 10, 10, 10.13, 10.13, 
10.13, 10.13, 10.13, 10.13, 10.13, 10, 10, 10, 10, 10, 10, 10, 
10, 10, 10, 10, 10, 10, 10, 10.13, 10.13, 10.13, 10.13, 10, 10, 
10, 10, 10, 10.13, 10.13, 10.13, 10.13, 10.13, 10.13, 10.13, 
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10.25, 10.25, 10.25, 
10.25, 10.25, 10.25, 10.25, 10.25, 10.25, 10.25, 10.13, 10, 10, 
1, 1, 1, 9.92, 9.56, 9.56, 10, 11.33, 10.2, 9.67, 10.44, 10.44, 
10.44, 9.8, 9.8, 10.5, 9.77, 9.77, 10.2, 10.22), PROVINCE = c(82, 
82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 
82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 
82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 
82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 
82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 
82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 
82, 82, 82), DISTRICT = c(71, 6, 7, 71, 71, 71, 5, 5, 5, 5, 7, 
7, 6, 6, 4, 4, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 
7, 7, 7, 7, 7, 7, 2, 2, 2, 3, 6, 6, 2, 1, 1, 4, 72, 72, 72, 4, 
4, 1, 1, 1, 1, 1), SUB_DISTRI = c(30, 40, 50, 21, 21, 21, 21, 
21, 21, 21, 50, 50, 40, 40, 40, 40, 50, 50, 50, 50, 50, 50, 50, 
50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 
50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 
50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 
50, 50, 50, 50, 50, 50, 50, 50, 50, 41, 41, 41, 20, 31, 31, 31, 
91, 101, 90, 31, 31, 31, 20, 20, 90, 90, 90, 101, 101), VILLAGE = c(17, 
11, 5, 13, 13, 13, 16, 16, 16, 16, 3, 5, 11, 11, 18, 18, 5, 5, 
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 4, 
4, 4, 24, 3, 3, 2, 17, 16, 19, 1, 1, 1, 14, 14, 18, 23, 23, 16, 
16), KODE2010 = c("8271030017", "8206040011", "8207050005", "8271021013", 
"8271021013", "8271021013", "8205021016", "8205021016", "8205021016", 
"8205021016", "8207050003", "8207050005", "8206040011", "8206040011", 
"8204040018", "8204040018", "8207050005", "8207050005", "8207050005", 
"8207050005", "8207050005", "8207050005", "8207050005", "8207050005", 
"8207050005", "8207050005", "8207050005", "8207050005", "8207050005", 
"8207050005", "8207050005", "8207050005", "8207050005", "8207050005", 
"8207050005", "8207050005", "8207050005", "8207050005", "8207050005", 
"8207050005", "8207050005", "8207050005", "8207050005", "8207050005", 
"8207050005", "8207050005", "8207050005", "8207050005", "8207050005", 
"8207050005", "8207050005", "8207050005", "8207050005", "8207050005", 
"8207050005", "8207050005", "8207050005", "8207050005", "8207050005", 
"8207050005", "8207050005", "8207050005", "8207050005", "8207050005", 
"8207050005", "8207050005", "8207050005", "8207050003", "8207050003", 
"8207050003", "8207050003", "8207050003", "8207050003", "8207050003", 
"8207050003", "8207050003", "8207050003", "8207050005", "8207050005", 
"8207050005", "8202041004", "8202041004", "8202041004", "8203020024", 
"8206031003", "8206031003", "8202031002", "8201091017", "8201101016", 
"8204090019", "8272031001", "8272031001", "8272031001", "8204020014", 
"8204020014", "8201090018", "8201090023", "8201090023", "8201101016", 
"8201101016"), ETHNICITY = c("1010", "114", "114", "114", "114", 
"114", "150", "150", "150", "150", "612", "612", "695", "695", 
"822", "822", "822", "904", "904", "904", "904", "904", "904", 
"904", "904", "904", "904", "904", "904", "904", "904", "904", 
"904", "904", "904", "904", "904", "904", "904", "904", "904", 
"904", "904", "904", "904", "904", "904", "904", "904", "904", 
"904", "904", "904", "904", "904", "904", "904", "904", "904", 
"904", "904", "904", "904", "904", "904", "904", "904", "929a", 
"929a", "929a", "929a", "929a", "929a", "929a", "929a", "929a", 
"929a", "929a", "929a", "929a", "1003", "1003", "1003", "1004", 
"86", "86", "113", "114", "114", "114", "114", "114", "114", 
"7000", "7000", "854", "88b", "88b", "88b", "88b"), DAILY_LANG = c("1060", 
"107", "107", "107", "107", "107", "107", "107", "107", "107", 
"107", "107", "107", "107", "107", "107", "107", "107", "107", 
"107", "107", "107", "107", "107", "107", "107", "107", "107", 
"107", "107", "107", "107", "107", "107", "107", "107", "107", 
"107", "107", "107", "107", "107", "107", "107", "107", "107", 
"107", "107", "107", "107", "107", "107", "107", "107", "107", 
"107", "107", "107", "107", "107", "107", "107", "107", "107", 
"107", "107", "107", "107", "107", "107", "107", "107", "107", 
"107", "107", "107", "107", "107", "107", "107", "1098", "1098", 
"1098", "1098", "113", "113", "114", "114", "114", "114", "114", 
"114", "114", "114", "114", "114", "114", "114", "114", "114"
), RELIGION = c(1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 2, 2, 1, 1, 1, 
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 
2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1), AGE = c(26, 30, 23, 58, 8, 43, 45, 41, 42, 33, 39, 29, 32, 
5, 50, 21, 46, 29, 25, 8, 43, 34, 5, 38, 14, 11, 6, 49, 16, 62, 
8, 50, 22, 17, 12, 38, 40, 8, 60, 16, 31, 37, 36, 10, 5, 42, 
41, 6, 18, 31, 53, 50, 30, 29, 25, 19, 11, 38, 36, 17, 10, 14, 
68, 29, 26, 9, 5, 37, 37, 12, 8, 30, 31, 60, 55, 25, 21, 36, 
28, 12, 39, 25, 34, 10, 37, 42, 17, 6, 5, 25, 47, 33, 27, 42, 
39, 36, 37, 34, 21, 47), SPEAK_INDO = c(1, 1, 1, 1, 1, 1, 1, 
1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 
1, 2, 1, 2, 1, 1, 2, 1, 2, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 
1, 2, 2, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1)), row.names = c(NA, 100L), class = "data.frame")

Your question is not fully clear, if you want to aggregate DAILY_LANG by VILLAGE you can use summarize() for that, but you don't have 775 in DAILY_LANG.

For example:

summarize(dat, proportion_107_speakers = mean(DAILY_LANG == 107), .by = VILLAGE)

Then you can filter() on the value of the proportion.

1 Like

775: As Mohammad Ali once said

The hands can't hit what the eyes can't see.

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.