# UMBR2014
# Part 08: Compilation
#
# This final section simply compiles the preceding results into a single
# output file, containing the numerator (total means-tested benefits),
# denominator (household estimates), adult and child household
# populations for each year and area.
source("umbr_functions.r")

# English/Wales benefits data
bens.ew <- fread("working/mtb_ew.csv")
setnames(bens.ew, c("LSOA11CD", "year"), c("Geogcode", "Year"))
# Scotland
bens.sc <- fread("working/mtb_scot.csv")
# Consistent With E&W, don't use individual benefits
bens.sc[,c("JSA", "IncSupp", "ESA", "PCGC")] <- NULL
setnames(bens.sc, c("geogcode", "year"), c("Geogcode", "Year"))

bens <- rbindlist( list(bens.ew, bens.sc) )
setkeyv(bens, c("Geogcode", "Year") )

# Households
hholds <- fread("working/household_estimates.csv")
setkeyv(hholds, c("Geogcode", "Year") )

umbr <- merge(bens, hholds)
# Calculating UMBR
umbr[,All.MTB := round(All.MTB)]
umbr[,UMBR.HH := All.MTB / Hholds]

# DEALING WITH GEOGRAPHY CHANGES
# Geography quality indicators, merged LSOAs
lsoa.conv <- fread("working/lsoa11-matches.csv")

# LSOAs that are not measured directly but cross-referred to composite
lsoa.x <- lsoa.conv[ LSOA11CD != LSOA.DEST,]

# Create a set of cases for the cross-referenced LSOAs
lsoa.xref <- data.table(Geogcode=rep( lsoa.x$LSOA11CD, unql(umbr$Year) ),
                        Year=as.vector(sapply(unq(umbr$Year), rep, nrow(lsoa.x))),
                        All.MTB=NA,
                        Hholds=NA,
                        Geog.XRef=rep( lsoa.x$LSOA.DEST, unql(umbr$Year) ) )

lsoa.xref[,UMBR.HH := umbr[ match(paste(lsoa.xref$Geog.XRef, lsoa.xref$Year),
                       paste(umbr$Geogcode, umbr$Year) ), UMBR.HH] ]

setcolorder(lsoa.xref, c("Geogcode", "Year", "All.MTB", "Hholds", "UMBR.HH", "Geog.XRef") )

# Combine the two, the base dataset plus the one with the cross-referenced areas
umbr$Geog.XRef <- ""
umbr.14 <- rbind(umbr, lsoa.xref)

# Add in the geography indicator
umbr.14$Geog.Indic <- lsoa.conv[ match(umbr.14$Geogcode, lsoa.conv$LSOA11CD), GeogChange ]
# Scotland are automatically correct
umbr.14[ grep("^S", Geogcode), Geog.Indic := "A" ]

# ONS geography changes marked "X" (complex boundaries) should be
# marked "B" in UMBR dataset
ons.matching <- fread("data/LSOA01_LSOA11_LAD11_EW_LU.csv")

umbr.14[Geogcode %in% ons.matching[CHGIND=="X",LSOA11CD],
        Geog.Indic := "B" ]

# Column ordering for output
setcolorder(umbr.14, c(1:5,7,6) )
# Sort
setkeyv(umbr.14, c("Year", "Geogcode"))
# Round
umbr.14[,UMBR.HH := round(UMBR.HH, 4)]

# Check we have the correct number of areas
stopifnot(unql(umbr.14$Geogcode) == 41253)
stopifnot(all(! is.na(umbr.14$UMBR.HH)))
# Save!
write.csv(umbr.14,
          "umbr14-esw.csv",
          row.names=FALSE,
          quote=FALSE,
          na="NA")


