# UMBR2014
# Part 04: Household number estimation
#
# This section estimates the number of households in each area in each
# year. It takes the numbers of adults living in households by age and
# sex, and applies a rate specific to that area, age and sex at which
# those people are household representative persons. These rates are
# calculated from the 2001 and 2011 Census, interpolated smoothly for
# intervening years, and then held constant from 2011. The number of
# households in each local authority area is constrained to the official
# published household estimate / projection in that area.
#
# The file uses the OA 2001 to LSOA 2011 lookups to estimate 2001 Census
# data to 2011 boundaries. The population living in households is
# calculated in part 02. Age-sex specific household representative rates
# are provided in Census tables CAS003 (2001) and in LC1101EW / LC1101SC
# (2011).
source("umbr_functions.r")

cas001.ew <- rbindlist(list(fread("~/data/Census/cas001-age_sex_restype-eng_oa.csv"),
                            fread("~/data/Census/cas001-age_sex_restype-wales_oa.csv") ) )

# Column definitions in umbr_functions.r
setnames(cas001.ew, 1:ncol(cas001.ew), c("OA", CAS001.COLS) )

# Groupings of CAS001 to sum to total household population
f.hh.groups <- lapply(CAS001.AGE.BANDS, sprintf, fmt="F_Hhold.%s")
names(f.hh.groups) <- lapply(names(f.hh.groups), sprintf, fmt="F.%s")

m.hh.groups <- lapply(CAS001.AGE.BANDS, sprintf, fmt="M_Hhold.%s")
names(m.hh.groups) <- lapply(names(m.hh.groups), sprintf, fmt="M.%s")

hh.pop.oa <- sum.regroup.dt(cas001.ew, c(f.hh.groups, m.hh.groups), "OA")


# Finally, first sum to 2011 LSOA boundaries ...
oa2lsoa <- fread("working/convert-oa01_to_lsoa11.csv")
hh.pop.01 <- hh.pop.oa[,
                       list(Count=sum(Count)),
                       by=list(Geogcode=oa2lsoa[match(hh.pop.oa$Geogcode, oa2lsoa$OA01CDO),
                                                LSOA11CD],
                               Sex.Age=Sex.Age) ]

cas003.ew <- rbindlist(list(fread("~/data/Census/cas003-age_sex_hrp-eng_oa.csv"),
                            fread("~/data/Census/cas003-age_sex_hrp-wales_oa.csv") ) )

# Column definitions in umbr_functions.r
setnames(cas003.ew, 1:ncol(cas003.ew), c("OA", CAS003.COLS) )

# Column definitions in umbr_functions.r
f.hrp.groups <- lapply(CAS003.AGE.BANDS.EW, sprintf, fmt="F_Tot.%s")
names(f.hrp.groups) <- lapply(names(f.hrp.groups), sprintf, fmt="F.%s")

m.hrp.groups <- lapply(CAS003.AGE.BANDS.EW, sprintf, fmt="M_Tot.%s")
names(m.hrp.groups) <- lapply(names(m.hrp.groups), sprintf, fmt="M.%s")

hrp.oa <- sum.regroup.dt(cas003.ew, c(f.hrp.groups, m.hrp.groups), "OA") 


# Sum to 2011 LSOA boundaries ...
hrp.01 <- hrp.oa[,
                     list(Count=sum(Count)),
                     by=list(Geogcode=oa2lsoa[match(hrp.oa$Geogcode, oa2lsoa$OA01CDO),
                                              LSOA11CD],
                         Sex.Age=Sex.Age) ]

# Add in dummy values for the children, who cannot be HRPs
hrp.01 <- rbindlist( list( hrp.01,
                              data.table(Geogcode=unique(hrp.01$Geogcode),
                                         Sex.Age="M.0_15",
                                         Count=0),
                              data.table(Geogcode=unique(hrp.01$Geogcode),
                                         Sex.Age="F.0_15",
                                         Count=0) ) )

headship.01 <- merge(hh.pop.01, hrp.01,
                     by=c("Geogcode", "Sex.Age"),
                     suffixes=c(".HHPOP", ".HRP") )
# Overall headship rates by sex and age
headship.01[,sum(Count.HRP) / sum(Count.HHPOP),by=list(Sex.Age)]

# 2011, England and Wales
# 2011 Residence Type by Age & Sex, E&W
lc1105.ew <- fread("data/lc1105ew-lsoa11.csv")
setnames(lc1105.ew, 4:ncol(lc1105.ew), LC1105.EW.COLS)

# Make the female age groups
f.hh.groups <- lapply(LC1105.EW.AGE.BANDS, sprintf, fmt="F.%s.Hhold")
names(f.hh.groups) <- lapply(names(f.hh.groups), sprintf, fmt="F.%s")

# Make the mmale age groups
m.hh.groups <- lapply(LC1105.EW.AGE.BANDS, sprintf, fmt="M.%s.Hhold")
names(m.hh.groups) <- lapply(names(m.hh.groups), sprintf, fmt="M.%s")

hh.pop.11 <- sum.regroup.dt(lc1105.ew, c(f.hh.groups, m.hh.groups), "geography code")

# Sum to 2011 LSOA output boundaries
lsoa2lsoa <- fread("working/lsoa11-matches.csv")

hh.pop.11 <- hh.pop.11[,
                       list(Count=sum(Count)),
                       by=list(Geogcode=lsoa2lsoa[match(hh.pop.11$Geogcode, lsoa2lsoa$LSOA11CD),
                                                  LSOA.DEST],
                                   Sex.Age=Sex.Age) ]

# 2011 HRPs -
lc1101.ew <- fread("data/lc1101ew-lsoa11.csv")
setnames(lc1101.ew, 4:ncol(lc1101.ew), LC1101.EW.COLS)

# Make the female age groups
f.hrp.groups <- lapply(LC1101.EW.AGE.BANDS, sprintf, fmt="F.%s.All")
names(f.hrp.groups) <- lapply(names(f.hrp.groups), sprintf, fmt="F.%s")

# Make the mmale age groups
m.hrp.groups <- lapply(LC1101.EW.AGE.BANDS, sprintf, fmt="M.%s.All")
names(m.hrp.groups) <- lapply(names(m.hrp.groups), sprintf, fmt="M.%s")

hrp.11 <- sum.regroup.dt(lc1101.ew, c(f.hrp.groups, m.hrp.groups), "geography code")

hrp.11 <- hrp.11[,
                 list(Count=sum(Count)),
                 by=list(Geogcode=lsoa2lsoa[match(hrp.11$Geogcode, lsoa2lsoa$LSOA11CD),
                                            LSOA.DEST],
                         Sex.Age=Sex.Age) ]

# Add in dummy values for the children, who cannot be HRPs
hrp.11 <- rbindlist( list( hrp.11,
                              data.table(Geogcode=unique(hrp.11$Geogcode),
                                         Sex.Age="M.0_15",
                                         Count=0),
                              data.table(Geogcode=unique(hrp.11$Geogcode),
                                         Sex.Age="F.0_15",
                                         Count=0) ) )

headship.11 <- merge(hh.pop.11, hrp.11,
                     by=c("Geogcode", "Sex.Age"),
                     suffixes=c(".HHPOP", ".HRP") )
# Overall headship rates by sex and age
headship.11[,sum(Count.HRP) / sum(Count.HHPOP),by=list(Sex.Age)]

headship <- merge(headship.01, headship.11,
                  by=c("Geogcode", "Sex.Age"),
                  suffixes=c(".01", ".11" ) )

write.csv(headship, "working/headship_ew.csv", row.names=FALSE)


#### SCOTLAND
cas001.sc <- fread("~/data/Census/cas001-age_sex_restype-scot_oa.csv")
# Column names for CAS001
setnames(cas001.sc, 1:ncol(cas001.sc), c("OA", CAS001.COLS) )

# Make the female age groups
f.hh.groups <- lapply(CAS001.AGE.BANDS.SC, sprintf, fmt="F_Hhold.%s")
names(f.hh.groups) <- lapply(names(f.hh.groups), sprintf, fmt="F.%s")

# Make the mmale age groups
m.hh.groups <- lapply(CAS001.AGE.BANDS.SC, sprintf, fmt="M_Hhold.%s")
names(m.hh.groups) <- lapply(names(m.hh.groups), sprintf, fmt="M.%s")

hh.pop.oa <- sum.regroup.dt(cas001.sc, c(f.hh.groups, m.hh.groups), "OA")
oa2dz <- fread("~/data/BORDERS/lookup/oa-2-dz_iz_ca.csv")
hh.pop.dz <- hh.pop.oa[,list(Count=sum(Count)),
                       by=list(Geogcode=oa2dz[match(inst.pop.oa$Geogcode, oa2dz$outcd01),
                                              data_zone],
                           Sex.Age=Sex.Age) ]
## 
cas003.sc <- fread("~/data/Census/cas003-age_sex_hrp-scot_oa.csv")

# Column definitions in umbr_functions.r
setnames(cas003.sc, 1:ncol(cas003.sc), c("OA", CAS003.COLS) )

# Column definitions in umbr_functions.r
f.hrp.groups <- lapply(CAS003.AGE.BANDS.SC, sprintf, fmt="F_Tot.%s")
names(f.hrp.groups) <- lapply(names(f.hrp.groups), sprintf, fmt="F.%s")

m.hrp.groups <- lapply(CAS003.AGE.BANDS.SC, sprintf, fmt="M_Tot.%s")
names(m.hrp.groups) <- lapply(names(m.hrp.groups), sprintf, fmt="M.%s")

hrp.oa <- sum.regroup.dt(cas003.sc, c(f.hrp.groups, m.hrp.groups), "OA") 
hrp.dz <- hrp.oa[,list(Count=sum(Count)),
                    by=list(Geogcode=oa2dz[match(hrp.oa$Geogcode, oa2dz$outcd01),
                                data_zone],
                        Sex.Age=Sex.Age) ]

# Add in dummy values for the children, who cannot be HRPs (with a tiny
# number of exceptions)
hrp.dz <- rbindlist( list( hrp.dz,
                          data.table(Geogcode=unique(hrp.dz$Geogcode),
                                     Sex.Age="M.0_15", Count=0),
                          data.table(Geogcode=unique(hrp.dz$Geogcode),
                                     Sex.Age="F.0_15", Count=0) ) )
headship.01 <- merge(hh.pop.dz, hrp.dz,
                     by=c("Geogcode", "Sex.Age"),
                     suffixes=c(".HHPOP", ".HRP") )
# Overall headship rates by sex and age - 2001
headship.01[,sum(Count.HRP) / sum(Count.HHPOP),by=list(Sex.Age)]


## 2011, SCOTLAND
lc1104.sc <- fread("~/data/Census/scot2011/3e_bulk/SNS Data Zone/LC1104SC.csv")
setnames(lc1104.sc, 2:ncol(lc1104.sc), LC1104.SC.COLS)

# Make the female age groups
f.hh.groups <- lapply(LC1104.SC.AGE.BANDS, sprintf, fmt="F.%s.Hhold")
names(f.hh.groups) <- lapply(names(f.hh.groups), sprintf, fmt="F.%s")

# Make the mmale age groups
m.hh.groups <- lapply(LC1104.SC.AGE.BANDS, sprintf, fmt="M.%s.Hhold")
names(m.hh.groups) <- lapply(names(m.hh.groups), sprintf, fmt="M.%s")

# Total household population
hh.pop.11 <- sum.regroup.dt(lc1104.sc, c(f.hh.groups, m.hh.groups), "V1")

# 2011 HRPs - LC11
lc1101.sc <- fread("~/data/Census/scot2011/3d_bulk/SNS Data Zone/LC1101SC.csv")
# This one is in partially-tabular form, with geog/age/sex in rows HRP/marital in cols

setnames(lc1101.sc, 4:10, LC1101.EW.MARITALS)
setnames(lc1101.sc, 1:3, c("Geogcode", "Sex", "Age") )
lc1101.sc[, Sex := LC1101.SC.SEXES[Sex] ]
lc1101.sc[, Age := LC1101.SC.AGES[Age] ]
lc1101.sc[, Sex.Age := sprintf("%s.%s", Sex, Age) ]

hrp.11 <- lc1101.sc[ Sex != "All" & Age != "All" ,
                    c("Geogcode", "Sex.Age", "All"), with=FALSE ]
setnames(hrp.11, "All", "Count")
# Add in dummy values for the children, who cannot be HRPs
hrp.11 <- rbindlist( list( hrp.11,
                              data.table(Geogcode=unique(hrp.11$Geogcode),
                                         Sex.Age="M.0_15", Count=0),
                              data.table(Geogcode=unique(hrp.11$Geogcode),
                                         Sex.Age="F.0_15", Count=0) ) )

headship.11 <- merge(hh.pop.11, hrp.11,
                     by=c("Geogcode", "Sex.Age"),
                     suffixes=c(".HHPOP", ".HRP") )
# Overall headship rates by sex and age
headship.11[,sum(Count.HRP) / sum(Count.HHPOP),by=list(Sex.Age)]

headship <- merge(headship.01, headship.11,
                  by=c("Geogcode", "Sex.Age"),
                  suffixes=c(".01", ".11" ) )

write.csv(headship, "working/headship_scot.csv", row.names=FALSE)
