Help with Dplyr

Hi, so I have code written that shows how often a pitch is different than the previous pitch. Now, I am curious to see when the pitch is "FF", how often the next pitch is different. Is there an easy way to do this with my already written code? Thank you.

p <- data.frame(stringsAsFactors=FALSE,
pitcher_name = c("Jacob deGrom", "Jacob deGrom", "Jacob deGrom",
"Jacob deGrom", "Jacob deGrom", "Jacob deGrom",
"Jacob deGrom", "Jacob deGrom", "Jacob deGrom", "Jacob deGrom",
"Jacob deGrom", "Jacob deGrom", "Jacob deGrom", "Jacob deGrom",
"Jacob deGrom", "Jacob deGrom", "Jacob deGrom", "Jacob deGrom",
"Jacob deGrom", "Jacob deGrom"),
pitch_type = c("FF", "FF", "FF", "FF", "FF", "FF", "CH", "FF", "SL", "SL",
"FF", "FF", "FF", "SL", "SL", "FF", "FF", "SL", "FF",
"FF")
)

p <- fridaypitchers%>%
select(pitcher_name, pitch_type, gameday_link)%>%
group_by(pitcher_name)%>%
mutate(diff_pitch = pitch_type != lag(pitch_type))%>%
print()

p%>%
summarize(diff_pitch_pct = mean(diff_pitch, na.rm = TRUE))%>%
arrange(desc(diff_pitch_pct))->Q

Do you mean like this?

library(dplyr)

fridaypitchers <- data.frame(stringsAsFactors=FALSE,
                pitcher_name = c("Jacob deGrom", "Jacob deGrom", "Jacob deGrom",
                                 "Jacob deGrom", "Jacob deGrom", "Jacob deGrom",
                                 "Jacob deGrom", "Jacob deGrom", "Jacob deGrom", "Jacob deGrom",
                                 "Jacob deGrom", "Jacob deGrom", "Jacob deGrom", "Jacob deGrom",
                                 "Jacob deGrom", "Jacob deGrom", "Jacob deGrom", "Jacob deGrom",
                                 "Jacob deGrom", "Jacob deGrom"),
                pitch_type = c("FF", "FF", "FF", "FF", "FF", "FF", "CH", "FF", "SL", "SL",
                               "FF", "FF", "FF", "SL", "SL", "FF", "FF", "SL", "FF",
                               "FF")
)

p <- fridaypitchers%>%
  #select(pitcher_name, pitch_type, gameday_link)%>%
  group_by(pitcher_name)%>%
  mutate(diff_pitch = pitch_type != lag(pitch_type))
head(p)
#> # A tibble: 6 x 3
#> # Groups:   pitcher_name [1]
#>   pitcher_name pitch_type diff_pitch
#>   <chr>        <chr>      <lgl>     
#> 1 Jacob deGrom FF         NA        
#> 2 Jacob deGrom FF         FALSE     
#> 3 Jacob deGrom FF         FALSE     
#> 4 Jacob deGrom FF         FALSE     
#> 5 Jacob deGrom FF         FALSE     
#> 6 Jacob deGrom FF         FALSE

Q2 <- p %>%
  group_by(pitcher_name, pitch_type) %>% 
  summarize(diff_pitch_pct = mean(diff_pitch, na.rm = TRUE))%>%
  arrange(desc(diff_pitch_pct))
Q2
#> # A tibble: 3 x 3
#> # Groups:   pitcher_name [1]
#>   pitcher_name pitch_type diff_pitch_pct
#>   <chr>        <chr>               <dbl>
#> 1 Jacob deGrom CH                  1    
#> 2 Jacob deGrom SL                  0.6  
#> 3 Jacob deGrom FF                  0.308

Created on 2019-06-22 by the reprex package (v0.2.1)

That looks great. Thanks for your help!

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.