# UMBR14
# Part 03: Institutional and Household Populations
#
# This section compiles the number of residents of communal
# establishments by sex and age for small areas from the 2001 and 2011
# Census results.
# 
# Institutional populations by age & sex are given in Census tables
#  CAS001 (2001) and LC1105EW / LC1105SC (2011).  2001 INSTITUTIONAL
#  POPULATIONS

source("umbr_functions.r")

cas001.ew <- rbindlist(list(fread("~/data/Census/cas001-age_sex_restype-eng_oa.csv"),
                            fread("~/data/Census/cas001-age_sex_restype-wales_oa.csv") ) )
# Column names for CAS001
setnames(cas001.ew, 1:ncol(cas001.ew), c("OA", CAS001.COLS) )

# Make the female age groups
f.inst.groups <- lapply(CAS001.AGE.BANDS.EW, sprintf, fmt="F_Inst.%s")
names(f.inst.groups) <- lapply(names(f.inst.groups), sprintf, fmt="F.%s")

# Make the mmale age groups
m.inst.groups <- lapply(CAS001.AGE.BANDS.EW, sprintf, fmt="M_Inst.%s")
names(m.inst.groups) <- lapply(names(m.inst.groups), sprintf, fmt="M.%s")

inst.pop.oa <- rbindlist( list(sum.regroup.dt(cas001.ew, f.inst.groups, "OA"),
                               sum.regroup.dt(cas001.ew, m.inst.groups, "OA") ) )
# Check
stopifnot(sum(inst.pop.oa$Count) == sum(cas001.ew$F_Inst.ALL) + sum(cas001.ew$M_Inst.ALL))

# Finally, first sum to 2011 LSOA boundaries ...
oa2lsoa <- fread("working/convert-oa01_to_lsoa11.csv")


inst.pop.01 <- inst.pop.oa[,
                           list(Count=sum(Count)),
                           by=list(Geogcode=oa2lsoa[match(inst.pop.oa$Geogcode, oa2lsoa$OA01CDO),
                                                    LSOA11CD],
                                   Sex.Age=Sex.Age) ]

# 2011 Residence Type by Age & Sex, E&W
lc1105.ew <- fread("data/lc1105ew-lsoa11.csv")
setnames(lc1105.ew, 4:ncol(lc1105.ew), LC1105.EW.COLS)

# Make the female age groups
f.inst.groups <- lapply(LC1105.EW.AGE.BANDS, sprintf, fmt="F.%s.Inst")
names(f.inst.groups) <- lapply(names(f.inst.groups), sprintf, fmt="F.%s")

# Make the mmale age groups
m.inst.groups <- lapply(LC1105.EW.AGE.BANDS, sprintf, fmt="M.%s.Inst")
names(m.inst.groups) <- lapply(names(m.inst.groups), sprintf, fmt="M.%s")

inst.pop.11 <- rbindlist( list(sum.regroup.dt(lc1105.ew, f.inst.groups, "geography code"),
                               sum.regroup.dt(lc1105.ew, m.inst.groups, "geography code") ) )

# Sum to 2011 LSOA output boundaries
lsoa2lsoa <- fread("working/lsoa11-matches.csv")

inst.pop.11 <- inst.pop.11[,
                           list(Count=sum(Count)),
                           by=list(Geogcode=lsoa2lsoa[match(inst.pop.11$Geogcode, lsoa2lsoa$LSOA11CD),
                                                      LSOA.DEST],
                                   Sex.Age=Sex.Age) ]

# The whole lot
inst.pop.all <- merge(inst.pop.01, inst.pop.11,
                      by=c("Geogcode", "Sex.Age"),
                      suffixes=c(".01", ".11"))

# Save the actual Census data
write.csv(inst.pop.all, "working/institutional_pops-ew.csv", row.names=FALSE)


### SCOTLAND
cas001.sc <- fread("~/data/Census/cas001-age_sex_restype-scot_oa.csv")
# Column names for CAS001
setnames(cas001.sc, 1:ncol(cas001.sc), c("OA", CAS001.COLS) )

# Make the female age groups
f.inst.groups <- lapply(CAS001.AGE.BANDS.SC, sprintf, fmt="F_Inst.%s")
names(f.inst.groups) <- lapply(names(f.inst.groups), sprintf, fmt="F.%s")

# Make the mmale age groups
m.inst.groups <- lapply(CAS001.AGE.BANDS.SC, sprintf, fmt="M_Inst.%s")
names(m.inst.groups) <- lapply(names(m.inst.groups), sprintf, fmt="M.%s")

inst.pop.oa <- rbindlist( list(sum.regroup.dt(cas001.sc, f.inst.groups, "OA"),
                               sum.regroup.dt(cas001.sc, m.inst.groups, "OA") ) )
# Check
stopifnot(sum(inst.pop.oa$Count) == sum(cas001.sc$F_Inst.ALL) + sum(cas001.sc$M_Inst.ALL))


# Finally, first sum to 2011 LSOA boundaries ...
oa2dz <- fread("~/data/BORDERS/lookup/oa-2-dz_iz_ca.csv")

inst.pop.01 <- inst.pop.oa[,list(Count=sum(Count)),
                           by=list(Geogcode=oa2dz[match(inst.pop.oa$Geogcode, oa2dz$outcd01),
                                                    data_zone],
                                   Sex.Age=Sex.Age) ]

# 2011 Residence Type by Age & Sex, Scot
lc1104.sc <- fread("~/data/Census/scot2011/3e_bulk/SNS Data Zone/LC1104SC.csv")
setnames(lc1104.sc, 2:ncol(lc1104.sc), LC1104.SC.COLS)

# Make the female age groups
f.inst.groups <- lapply(LC1104.SC.AGE.BANDS, sprintf, fmt="F.%s.Inst")
names(f.inst.groups) <- lapply(names(f.inst.groups), sprintf, fmt="F.%s")

# Make the mmale age groups
m.inst.groups <- lapply(LC1104.SC.AGE.BANDS, sprintf, fmt="M.%s.Inst")
names(m.inst.groups) <- lapply(names(m.inst.groups), sprintf, fmt="M.%s")

inst.pop.11 <- rbindlist( list(sum.regroup.dt(lc1104.sc, f.inst.groups, "V1"),
                               sum.regroup.dt(lc1104.sc, m.inst.groups, "V1") ) )

# The whole lot
inst.pop.all <- merge(inst.pop.01, inst.pop.11,
                      by=c("Geogcode", "Sex.Age"),
                      suffixes=c(".01", ".11"))

# 5 Datazones no longer exist in 2011 Census results
unql(inst.pop.all$Geogcode)
# Save the actual Census data
write.csv(inst.pop.all, "working/institutional_pops-sc.csv", row.names=FALSE)
