Some simulated data, borrowed from this post.

# library for generation multivariate distributions
library(MASS)

# always use the same random numbers
set.seed(123)

# the means and errors for the multivariate distribution
MUs    <- c(10,15)
SIGMAs <- matrix(c(1,   0.5,
                   0.5, 2   ),
                 nrow=2,
                 ncol=2       )

# the multivariate distribution
mdist <- mvrnorm(n     = 1000,
                 mu    = MUs,
                 Sigma = SIGMAs)

# create unobserved covariate
c <- mdist[ , 2]

# create the instrumental variable
z <- rnorm(1000)

# create observed variable
x <- mdist[ , 1] + z

# constuct the dependent variable
y <- 1 + x + c + rnorm(1000, 0, 0.5)

Check if the variables behave as expected

cor(x, c)
## [1] 0.1986307
cor(z, c)
## [1] -0.0120011

Let’s look at the true model.

lm(y ~ x + c)
##
## Call:
## lm(formula = y ~ x + c)
##
## Coefficients:
## (Intercept)            x            c  
##      0.9079       1.0156       0.9955

Estimate using OLS.

lm(y ~ x)
##
## Call:
## lm(formula = y ~ x)
##
## Coefficients:
## (Intercept)            x  
##      13.787        1.226

Now using instrumental variables.

library(AER)
ivreg(y ~ x | z)
##
## Call:
## ivreg(formula = y ~ x | z)
##
## Coefficients:
## (Intercept)            x  
##      15.949        1.008

Now using the lm function.

# first stage
lms1 <- lm(x ~ z)

# manually obtain fitted values
lmXhat <- lms1$coefficients[2]*z + lms1$coefficients[1]

# estimate second stage using Xhat
(lms2 <- lm(y ~ lmXhat) )
##
## Call:
## lm(formula = y ~ lmXhat)
##
## Coefficients:
## (Intercept)       lmXhat  
##      15.949        1.008

Now we can do the same using a neural network

library(nnet)

# first stage
nns1 <- nnet(x ~ z, size=0, skip=TRUE, linout=TRUE)
## # weights:  2
## initial  value 100832.781903
## final  value 924.804075
## converged
# manually obtain fitted values
nnXhat <- nns1$fitted.values

# estimate second stage using Xhat
nns2 <- nnet(y ~ nnXhat, size=0, skip=TRUE, linout=TRUE)
## # weights:  2
## initial  value 528901.038261
## final  value 4019.409973
## converged
summary(nns2)
## a 1-0-1 network with 2 weights
## options were - skip-layer connections  linear output units
##  b->o i1->o
## 15.95  1.01

Compare output.

lms2$coefficients - nns2$wts
##   (Intercept)        lmXhat
## -1.749729e-10 -2.814797e-09

Compare estimates.

library(ggplot2)
qplot(lms2$fitted.values - nns2$fitted.values)

plot of chunk qplot

Now redo using a non-linearity

Updated: