Double/integer conversion with cpp11

Dear all,

I hope you are well!

This is a code that I prepared and which crashes the R session (https://github.com/pachadotdev/fixest2/blob/cpp11_wip/dev/gdb-debug-4.R). The issue is that the C++ function cpp_get_fe_gnl_() doesn't like the matrix obsCluster, which is a matrix of integers. If we cast it as integer in R, the R session crashes with a 'memory not mapped' message. If we pass it as a double with no cast, the function does nothing and returns an error of 'double vs integer'.

# library(fixest2)
devtools::load_all()

# REPRODUCIBLE ERROR ---

# gravity_pois = fepois(Euros ~ log(dist_km) | Origin + Destination + Product + Year, trade)
# fixedEffects = fixef(gravity_pois)
# Error: Invalid input type, expected 'integer' actual 'double'

# NOW WE SEE IT LINE BY LINE ----

# the problem is in fixef.fixest function L882 Methods.R

# this is the same as to run fixef.fixest line by line
# lines 924-1079 that do not apply for this case

gravity_pois = fepois(Euros ~ log(dist_km) | Origin + Destination + Product + Year, trade)

object <- gravity_pois
notes <- getFixest_notes()
sorted <- TRUE
nthreads <- getFixest_nthreads()
fixef.tol <- 1e-5
fixef.iter <- 10000
S <- object$sumFE
family <- object$family
fixef_names <- object$fixef_vars
fixef_id <- object$fixef_id
Q <- length(fixef_id)
N <- length(S)
id_dummies_vect <- list()
for (i in 1:Q) id_dummies_vect[[i]] <- as.vector(fixef_id[[i]])
is_ref_approx <- FALSE
isSlope <- FALSE
dumMat <- matrix(unlist(id_dummies_vect), N, Q) - 1
orderCluster <- matrix(unlist(lapply(id_dummies_vect, order)), N, Q) - 1
nbCluster <- sapply(fixef_id, max)

# check the data types
print(paste("Q", class(Q)))
print(paste("N", class(N)))
print(paste("S", class(S)))
print(paste("dumMat", class(dumMat)))
print(paste("nbCluster", class(nbCluster)))
print(paste("orderCluster", class(orderCluster)))

# ERROR 1 ----

# THIS IS THE LINE THAT BREAKS FIXEF()
fixef_values <- cpp_get_fe_gnl(Q, N, S, dumMat, nbCluster, orderCluster)

# ERROR MESSAGE:
#
# OK L570OK L5954
# 0
# OK L612OK L6321
# 
#  *** caught segfault ***
# address 0x5623b3cd0df0, cause 'memory not mapped'
# 
# Traceback:
#  1: .Call(`_fixest2_cpp_get_fe_gnl_`, as.integer(Q), as.integer(N),     sumFE, as.integer(dumMat), as.integer(cluster_sizes), as.integer(obsCluster))
#  2: cpp_get_fe_gnl(Q, N, S, dumMat, nbCluster, orderCluster)
# 
# Possible actions:
# 1: abort (with core dump, if enabled)
# 2: normal R exit
# 3: exit R without saving workspace
# 4: exit R saving workspace

# NOW WE TRY TO STORE THE MATRICES AS INTEGER-TYPE
storage.mode(dumMat) <- "integer"
storage.mode(orderCluster) <- "integer"

# THE FUNCTION BREAKS AGAIN
# fixef_values <- cpp_get_fe_gnl(Q, N, S, dumMat, nbCluster, orderCluster)

# ERROR MESSAGE:
#
# OK L570OK L5954
# 0
# OK L612OK L6321
#
#  *** caught segfault ***
# address 0x56144c29706c, cause 'memory not mapped'
#
# Traceback:
#  1: .Call(`_fixest2_cpp_get_fe_gnl_`, as.integer(Q), as.integer(N), sumFE, as.integer(dumMat), as.integer(cluster_sizes), as.integer(obsCluster))
#  2: cpp_get_fe_gnl(Q, N, S, dumMat, nbCluster, orderCluster)
#
# Possible actions:
# 1: abort (with core dump, if enabled)
# 2: normal R exit
# 3: exit R without saving workspace
# 4: exit R saving workspace

To run it, and make the R session crash, I would need this "recipe" to follow the exact same steps I used:

  1. Clone the project
git clone --depth 1 -b cpp11_wip --single-branch git@github.com:pachadotdev/fixest2.git
  1. Open the fixest2 project in RStudio and press CTRL+SHIFT+B to install

  2. Run the previous example

I also opened an issue for this problem: Integers vs doubles matrix breaks fixef() · Issue #52 · pachadotdev/fixest2 · GitHub.

Best wishes,

The vignette, with the same Poisson beginning object seems to work well enough

library(fixest)

gravity_pois = fepois(Euros ~ log(dist_km) | Origin + Destination + Product + Year, trade)
print(gravity_pois)
#> Poisson estimation, Dep. Var.: Euros
#> Observations: 38,325 
#> Fixed-effects: Origin: 15,  Destination: 15,  Product: 20,  Year: 10
#> Standard-errors: Clustered (Origin) 
#>              Estimate Std. Error t value  Pr(>|t|)    
#> log(dist_km) -1.52787   0.115678 -13.208 < 2.2e-16 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> Log-Likelihood: -7.025e+11   Adj. Pseudo R2: 0.764032
#>            BIC:  1.405e+12     Squared Cor.: 0.612021
summary(gravity_pois, vcov = "twoway")
#> Poisson estimation, Dep. Var.: Euros
#> Observations: 38,325 
#> Fixed-effects: Origin: 15,  Destination: 15,  Product: 20,  Year: 10
#> Standard-errors: Clustered (Origin & Destination) 
#>              Estimate Std. Error  t value  Pr(>|t|)    
#> log(dist_km) -1.52787   0.130734 -11.6869 < 2.2e-16 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> Log-Likelihood: -7.025e+11   Adj. Pseudo R2: 0.764032
#>            BIC:  1.405e+12     Squared Cor.: 0.612021
summary(gravity_pois, vcov = ~Product)
#> Poisson estimation, Dep. Var.: Euros
#> Observations: 38,325 
#> Fixed-effects: Origin: 15,  Destination: 15,  Product: 20,  Year: 10
#> Standard-errors: Clustered (Product) 
#>              Estimate Std. Error t value  Pr(>|t|)    
#> log(dist_km) -1.52787   0.098294 -15.544 < 2.2e-16 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> Log-Likelihood: -7.025e+11   Adj. Pseudo R2: 0.764032
#>            BIC:  1.405e+12     Squared Cor.: 0.612021
summary(gravity_pois, cluster = "Product")
#> Poisson estimation, Dep. Var.: Euros
#> Observations: 38,325 
#> Fixed-effects: Origin: 15,  Destination: 15,  Product: 20,  Year: 10
#> Standard-errors: Clustered (Product) 
#>              Estimate Std. Error t value  Pr(>|t|)    
#> log(dist_km) -1.52787   0.098294 -15.544 < 2.2e-16 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> Log-Likelihood: -7.025e+11   Adj. Pseudo R2: 0.764032
#>            BIC:  1.405e+12     Squared Cor.: 0.612021
summary(gravity_pois, cluster = ~Product)
#> Poisson estimation, Dep. Var.: Euros
#> Observations: 38,325 
#> Fixed-effects: Origin: 15,  Destination: 15,  Product: 20,  Year: 10
#> Standard-errors: Clustered (Product) 
#>              Estimate Std. Error t value  Pr(>|t|)    
#> log(dist_km) -1.52787   0.098294 -15.544 < 2.2e-16 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> Log-Likelihood: -7.025e+11   Adj. Pseudo R2: 0.764032
#>            BIC:  1.405e+12     Squared Cor.: 0.612021
summary(gravity_pois, cluster = ~Product)
#> Poisson estimation, Dep. Var.: Euros
#> Observations: 38,325 
#> Fixed-effects: Origin: 15,  Destination: 15,  Product: 20,  Year: 10
#> Standard-errors: Clustered (Product) 
#>              Estimate Std. Error t value  Pr(>|t|)    
#> log(dist_km) -1.52787   0.098294 -15.544 < 2.2e-16 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> Log-Likelihood: -7.025e+11   Adj. Pseudo R2: 0.764032
#>            BIC:  1.405e+12     Squared Cor.: 0.612021
gravity_simple = fepois(Euros ~ log(dist_km), trade)
summary(gravity_simple, ~Origin + Destination)
#> Poisson estimation, Dep. Var.: Euros
#> Observations: 38,325 
#> Standard-errors: Clustered (Origin & Destination) 
#>              Estimate Std. Error  t value   Pr(>|t|)    
#> (Intercept)  24.70889   1.124768 21.96798  < 2.2e-16 ***
#> log(dist_km) -1.02896   0.158022 -6.51145 7.4429e-11 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> Log-Likelihood: -2.426e+12   Adj. Pseudo R2: 0.185023
#>            BIC:  4.852e+12     Squared Cor.: 0.055107
fepois(Euros ~ log(dist_km), trade, vcov = ~Product)
#> Poisson estimation, Dep. Var.: Euros
#> Observations: 38,325 
#> Standard-errors: Clustered (Product) 
#>              Estimate Std. Error  t value  Pr(>|t|)    
#> (Intercept)  24.70889   0.330044  74.8654 < 2.2e-16 ***
#> log(dist_km) -1.02896   0.045954 -22.3909 < 2.2e-16 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> Log-Likelihood: -2.426e+12   Adj. Pseudo R2: 0.185023
#>            BIC:  4.852e+12     Squared Cor.: 0.055107
gravity_ols = feols(log(Euros) ~ log(dist_km) | Origin + Destination + Product + Year, trade)
gravity_negbin = fenegbin(Euros ~ log(dist_km) | Origin + Destination + Product + Year, trade)
etable(gravity_pois, gravity_negbin, gravity_ols,
       vcov = "twoway", headers = c("Poisson", "Negative Binomial", "Gaussian"))
#>                       gravity_pois     gravity_negbin        gravity_ols
#>                            Poisson  Negative Binomial           Gaussian
#> Dependent Var.:              Euros              Euros         log(Euros)
#>                                                                         
#> log(dist_km)    -1.528*** (0.1307) -1.711*** (0.1773) -2.170*** (0.1714)
#> Fixed-Effects:  ------------------ ------------------ ------------------
#> Origin                         Yes                Yes                Yes
#> Destination                    Yes                Yes                Yes
#> Product                        Yes                Yes                Yes
#> Year                           Yes                Yes                Yes
#> _______________ __________________ __________________ __________________
#> Family                     Poisson          Neg. Bin.                OLS
#> S.E.: Clustered  by: Orig. & Dest.  by: Orig. & Dest.  by: Orig. & Dest.
#> Observations                38,325             38,325             38,325
#> Squared Cor.               0.61202            0.43761            0.70558
#> Pseudo R2                  0.76403            0.03474            0.23640
#> BIC                        1.4e+12        1,293,786.6          151,977.2
#> Over-dispersion                 --            0.54879                 --
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1


gravity_subfe = list()
all_FEs = c("Year", "Destination", "Origin")
for(i in 0:3){
  gravity_subfe[[i+1]] = fepois(Euros ~ log(dist_km), trade, fixef = all_FEs[0:i])
}
etable(gravity_subfe, cluster = ~Origin+Destination)
#>                            model 1            model 2            model 3
#> Dependent Var.:              Euros              Euros              Euros
#>                                                                         
#> Constant          24.71*** (1.125)                                      
#> log(dist_km)    -1.029*** (0.1580) -1.029*** (0.1581) -1.226*** (0.2045)
#> Fixed-Effects:  ------------------ ------------------ ------------------
#> Year                            No                Yes                Yes
#> Destination                     No                 No                Yes
#> Origin                          No                 No                 No
#> _______________ __________________ __________________ __________________
#> S.E.: Clustered  by: Orig. & Dest.  by: Orig. & Dest.  by: Orig. & Dest.
#> Observations                38,325             38,325             38,325
#> Squared Cor.               0.05511            0.05711            0.16420
#> Pseudo R2                  0.18502            0.18833            0.35826
#> BIC                       4.85e+12           4.83e+12           3.82e+12
#> 
#>                            model 4
#> Dependent Var.:              Euros
#>                                   
#> Constant                          
#> log(dist_km)    -1.518*** (0.1282)
#> Fixed-Effects:  ------------------
#> Year                           Yes
#> Destination                    Yes
#> Origin                         Yes
#> _______________ __________________
#> S.E.: Clustered  by: Orig. & Dest.
#> Observations                38,325
#> Squared Cor.               0.38479
#> Pseudo R2                  0.59312
#> BIC                       2.42e+12
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
etable(res_multi, cluster = ~Origin+Destination, tex = TRUE)
#> Error: in etable(res_multi, cluster = ~Origin + Destination...:
#>  Some elements in '...' could not be evaluated: 
#>   object 'res_multi' not found
myDict = c("log(dist_km)" = "$\\ln (Distance)$", "(Intercept)" = "Constant")
etable(res_multi, signifCode = c("a" = 0.01, "b" = 0.05),
       drop = "Const", dict = myDict, file = "Estimation Tables.tex", 
       replace = TRUE, title = "First export -- normal Standard-errors")
#> Error: in etable(res_multi, signifCode = c(a = 0.01, b = 0....:
#>  Some elements in '...' could not be evaluated: 
#>   object 'res_multi' not found
etable(res_multi, cluster = ~Product, order = "Dist", 
       dict = myDict, file = "Estimation Tables.tex", 
       title = "Second export -- clustered standard-errors (on Product variable)")
#> Error: in etable(res_multi, cluster = ~Product, order = "Di...:
#>  Some elements in '...' could not be evaluated: 
#>   object 'res_multi' not found
fixedEffects = fixef(gravity_pois)
summary(fixedEffects)
#> Fixed_effects coefficients
#>                         Origin Destination Product  Year
#> Number of fixed-effects     15          15      20    10
#> Number of references         0           1       1     1
#> Mean                      23.3        3.09  0.0129 0.157
#> Standard-deviation        1.28        1.11    1.36 0.113
#> 
#> COEFFICIENTS:
#>   Origin:    AT    BE    DE    DK    ES                 
#>           22.51 23.56 24.71 23.44 24.97 ... 10 remaining
#> -----
#>   Destination:    AT    BE    DE    DK    ES                 
#>                2.436 2.696 4.323 2.451 4.043 ... 10 remaining
#> -----
#>   Product: 1     2      3     4      5                 
#>            0 1.414 0.6562 1.449 -1.521 ... 15 remaining
#> -----
#>   Year: 2007    2008     2009    2010  2011                
#>            0 0.06912 0.005225 0.07331 0.163 ... 5 remaining
fixedEffects$Year
#>        2007        2008        2009        2010        2011        2012 
#> 0.000000000 0.069122284 0.005225473 0.073308208 0.163013386 0.192605170 
#>        2013        2014        2015        2016 
#> 0.230629376 0.242605404 0.282800683 0.310325692
plot(fixedEffects)

data(base_did)
est = feols(y ~ x1, base_did)
est_panel = feols(y ~ x1, base_did, panel.id = ~id + period)
summary(est_panel, "newey_west")
#> OLS estimation, Dep. Var.: y
#> Observations: 1,080 
#> Standard-errors: Newey-West (L=1) 
#>             Estimate Std. Error t value   Pr(>|t|)    
#> (Intercept) 1.988753   0.174111 11.4223 1.1709e-06 ***
#> x1          0.983110   0.052699 18.6551 1.6762e-08 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> RMSE: 4.89686   Adj. R2: 0.262357

Created on 2023-07-31 with reprex v2.0.2

to have a matrix of integers in R you need the following

  1. for the values put in at creation to be integers
  2. for any arithmetic manipulation of the integer based matrix to itself by of an integer type

In your case :

orderCluster <- matrix(unlist(lapply(id_dummies_vect, order)), N, Q) - 1

should become

orderCluster <- matrix(as.integer(unlist(lapply(id_dummies_vect, order))), N, Q) - 1L

Note that I made two changes. as.integer() to wrap the numeric inputs to be cast to integer type for the matrix creation. secondly, using -1L rather than -1 as -1 is numeric (double) / not integer, and would cause an implicit cast to numeric which you want to avoid for your matrix. Rather, -1L is an integer and therefore conforms with the integer nature of the matrix, thus preserving it.

This topic was automatically closed 42 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.