Correspondence Analysis in Rstudio

Preparatory:

  1. replace - with _ in variable names
  2. replace - with minus and + with plus for last variable
  3. convert all variables to factors
  4. Review mjca function signature for expected argument

ob j A response pattern matrix (data frame containing factors), or a frequency table(a “table” object) or an integer array

suppressPackageStartupMessages({
  library(ca)
})

input <- as.data.frame(structure(list(
  dens_ville = c(
    "moderate", "very_low", "moderate",
    "low", "very_low", "moderate", "moderate", "moderate", "moderate",
    "moderate", "moderate", "moderate", "moderate", "low", "low",
    "moderate", "moderate", "moderate", "moderate", "low"
  ), climat = c(
    "cold_semi_arid",
    "cold_semi_arid", "cold_semi_arid", "cold_semi_arid", "cold_semi_arid",
    "hot_semi_arid", "hot_semi_arid", "continental", "continental",
    "continental", "continental", "continental", "continental", "continental",
    "continental", "continental", "continental", "continental", "continental",
    "continental"
  ), prov = c(
    "Instagram", "direct", "Instagram",
    "Facebook", "Facebook", "Facebook", "Instagram", "Facebook",
    "direct", "Instagram", "direct", "Instagram", "Facebook", "direct",
    "direct", "Instagram", "Instagram", "Facebook", "Instagram",
    "Instagram"
  ), promo = c(
    "N", "N", "O", "N", "N", "N", "N", "N",
    "N", "N", "N", "N", "N", "O", "N", "O", "O", "N", "N", "N"
  ),
  sub_mail = c(
    "O", "O", "O", "O", "O", "O", "O", "O", "N",
    "N", "N", "O", "N", "O", "O", "O", "O", "N", "N", "N"
  ), GDP_ca = c(
    "60k",
    "40k", "60k", "60k", "60k", "60k", "60k", "50k", "50k", "50k",
    "50k", "50k", "50k", "60k", "50k", "50k", "50k", "60k", "60k",
    "50K"
  ), benef_quali = c(
    "plus", "plus", "minus", "plus",
    "plus", "plus", "minus", "plus", "plus", "plus", "plus",
    "minus", "minus", "minus", "plus", "plus", "plus", "minus",
    "minus", "minus"
  )
), class = c(
  "spec_tbl_df", "tbl_df", "tbl",
  "data.frame"
), row.names = c(NA, -20L), spec = structure(list(
  cols = list(dens_ville = structure(list(), class = c(
    "collector_character",
    "collector"
  )), climat = structure(list(), class = c(
    "collector_character",
    "collector"
  )), prov = structure(list(), class = c(
    "collector_character",
    "collector"
  )), promo = structure(list(), class = c(
    "collector_character",
    "collector"
  )), sub_mail = structure(list(), class = c(
    "collector_character",
    "collector"
  )), GDP_ca = structure(list(), class = c(
    "collector_character",
    "collector"
  )), benef_quali = structure(list(), class = c(
    "collector_character",
    "collector"
  ))), default = structure(list(), class = c(
    "collector_guess",
    "collector"
  )), skip = 1L
), class = "col_spec")))

mjca(input)
#> 
#>  Eigenvalues:
#>            1        2        3       4        5      
#> Value      0.079674 0.025836 0.01188 0.008318 0.00043
#> Percentage 45.01%   14.59%   6.71%   4.7%     0.24%  
#> 
#> 
#>  Columns:
#>         dens_ville:low dens_ville:moderate dens_ville:very_low
#> Mass          0.028571            0.100000            0.014286
#> ChiDist       0.887074            0.326054            1.640732
#> Inertia       0.022483            0.010631            0.038457
#> Dim. 1        0.203258            0.508740           -3.967700
#> Dim. 2        0.521807           -0.313550            1.151234
#>         climat:cold_semi_arid climat:continental climat:hot_semi_arid
#> Mass                 0.035714           0.092857             0.014286
#> ChiDist              0.926191           0.406944             1.302186
#> Inertia              0.030637           0.015377             0.024224
#> Dim. 1              -2.184247           0.884040            -0.285640
#> Dim. 2              -0.612577           0.750688            -3.348028
#>         prov:direct prov:Facebook prov:Instagram   promo:N   promo:O sub_mail:N
#> Mass       0.035714      0.042857       0.064286  0.114286  0.028571   0.050000
#> ChiDist    0.796396      0.654224       0.500775  0.220038  0.880154   0.680510
#> Inertia    0.022652      0.018343       0.016121  0.005533  0.022133   0.023155
#> Dim. 1    -0.611919     -0.527934       0.691910 -0.078394  0.313577   1.246810
#> Dim. 2     2.270584     -0.831213      -0.707294  0.131480 -0.525919   0.899438
#>         sub_mail:O GDP_ca:40k GDP_ca:50k GDP_ca:50K GDP_ca:60k
#> Mass      0.092857   0.007143   0.064286   0.007143   0.064286
#> ChiDist   0.366428   2.250560   0.557181   2.013094   0.535171
#> Inertia   0.012468   0.036179   0.019958   0.028947   0.018412
#> Dim. 1   -0.671359  -4.783702   0.752008   2.261258  -0.471736
#> Dim. 2   -0.484313   3.164784   1.211729   0.728415  -1.644307
#>         benef_quali:minus benef_quali:plus
#> Mass             0.057143         0.085714
#> ChiDist          0.569845         0.379897
#> Inertia          0.018556         0.012370
#> Dim. 1           0.882021        -0.588014
#> Dim. 2          -0.893626         0.595750

Created on 2020-12-04 by the reprex package (v0.3.0.9001)

1 Like