Social Vulnerability Ireland - Census Data

Social Vulnerability Ireland - Census Data#

Environment#

R Libraries#

Any required R libraries are imported into the kernal:

# Load R libraries
## none required

Output directory#

# create the pipeline directory if it does not exist
pipeline_dir <- file.path("../..","2_pipeline","Ireland","1a_CensusData","2022")
if(!dir.exists(pipeline_dir)){
    dir.create(pipeline_dir, recursive = TRUE)
    print(paste0(pipeline_dir, " created"))
}

Load Data#

Import the csv data#

Ireland census data from: https://www.cso.ie/en/census/census2022/census2022smallareapopulationstatistics

# Read the census data
census_data <- read.csv('../../0_data/census/Ireland/2022/SAPS_2022_Small_Area_UR_171024.csv', sep=",")

# remove 'IE0' row from Census 2022 CSV supplied by CSO, and reindex
census_data <- census_data[census_data$GUID != "IE0", ]
row.names(census_data) <- 1:nrow(census_data)

head(census_data)

A data.frame: 6 × 795
	GUID	GEOGID	GEOGDESC	UR_Category	UR_Category_Desc	T1_1AGE0M	T1_1AGE1M	T1_1AGE2M	T1_1AGE3M	T1_1AGE4M	⋯	T15_1_2C	T15_1_3C	T15_1_GE4C	T15_1_NSC	T15_1_TC	T15_2_BB	T15_2_OIC	T15_2_NO	T15_2_NS	T15_2_T
	<chr>	<chr>	<chr>	<int>	<chr>	<int>	<int>	<int>	<int>	<int>	⋯	<int>	<int>	<int>	<int>	<int>	<int>	<int>	<int>	<int>	<int>
1	4c07d11e-11d3-851d-e053-ca3ca8c0ca7f	017001001	017001001	4	4. Rural areas with high urban influence	0	6	1	1	0	⋯	71	20	6	7	135	80	37	9	9	135
2	4c07d11e-123a-851d-e053-ca3ca8c0ca7f	017002001	017002001	4	4. Rural areas with high urban influence	1	2	1	2	4	⋯	60	14	6	2	118	90	11	15	2	118
3	4c07d11e-14b1-851d-e053-ca3ca8c0ca7f	017002002	017002002	3	3. Independent urban towns	1	5	1	2	0	⋯	45	7	3	3	119	101	3	10	5	119
4	bf640964-28f3-4ccf-a610-04685d80ea2e	017002003/01	017002003/01	4	4. Rural areas with high urban influence	4	5	1	1	4	⋯	75	15	8	7	157	134	3	11	9	157
5	4c07d11d-f709-851d-e053-ca3ca8c0ca7f	017003001	017003001	5	5. Rural areas with moderate urban influence	0	1	1	1	1	⋯	31	19	6	9	90	50	16	14	10	90
6	4c07d11e-1237-851d-e053-ca3ca8c0ca7f	017003002	017003002	5	5. Rural areas with moderate urban influence	0	2	3	1	3	⋯	57	11	6	8	118	85	16	10	7	118

Prepare data#

We only require a subset of the census data for our purposes. We therefore need to extract the relevant data, then combine these to create our vulnerability indicators.

In addition, the raw data is not suitable for use within the vulnerabiltiy assessment. It needs to be normalised based on the number of people/households within each small area. Therefore, the data is converted to percentages based on the total persons/households within each small area.

Supporting data#

Code that uniquely identifies the census area#

identifier <- c('GUID')
census_area_id <- census_data[, identifier, drop = FALSE]

# number of rows in the census data
head( nrow(census_data) )

18919

Population total#

population_total <- census_data[, 'T1_1AGETT', drop = FALSE]
names(population_total)[1] <- 'population_total'
head(population_total)

A data.frame: 6 × 1
	population_total
	<int>
1	419
2	345
3	338
4	439
5	255
6	344

Households / families total#

households_total <- census_data[, 'T5_1T_H', drop = FALSE]
names(households_total)[1] <- 'households_total'
head(households_total)

A data.frame: 6 × 1
	households_total
	<int>
1	137
2	118
3	119
4	158
5	91
6	119

Domain data#

Age domain#

### Domain:Age ###

#TODO: INCLUDE Age 5
# Age - early childhood (0 to 5 yeards old)
early_childhood_fields <- c(
    'T1_1AGE0T', # Age 0 - Total
    'T1_1AGE1T', # Age 1 - Total
    'T1_1AGE2T', # Age 2 - Total
    'T1_1AGE3T', # Age 3 - Total
    'T1_1AGE4T', # Age 4 - Total
    'T1_1AGE5T'  # Age 5 - Total
)
early_childhood_data <- census_data[, early_childhood_fields, drop = FALSE]
early_childhood <- rowSums(early_childhood_data, na.rm=TRUE)
early_childhood_pct <- (early_childhood / population_total) * 100.0
names(early_childhood_pct)[1] <- 'early_childhood_pct'

# Age - middle to oldest old (75+ years old)
age_middle_to_oldest_old_fields <- c(
    'T1_1AGE75_79T', # Age 75 - 79 - Total
    'T1_1AGE80_84T', # Age 80 - 84 - Total
    'T1_1AGEGE_85T'  # Age 85 and over - Total
)
age_middle_to_oldest_old_data <- census_data[, age_middle_to_oldest_old_fields, drop = FALSE]
age_middle_to_oldest_old <- rowSums(age_middle_to_oldest_old_data, na.rm=TRUE)
age_middle_to_oldest_old_pct <- (age_middle_to_oldest_old / population_total) * 100.0
names(age_middle_to_oldest_old_pct)[1] <- 'age_middle_to_oldest_old_pct'

# Combine all these indicators into an array for this domain
age_domain_pct <- cbind(early_childhood_pct,
                        age_middle_to_oldest_old_pct)

# Print the first six rows of the data to visually check it looks OK
head(age_domain_pct)

A data.frame: 6 × 2
	early_childhood_pct	age_middle_to_oldest_old_pct
	<dbl>	<dbl>
1	6.443914	4.534606
2	6.086957	11.014493
3	5.621302	3.550296
4	8.883827	8.883827
5	6.274510	5.098039
6	7.267442	8.139535

Health domain#

### Domain:Health ###

# Health - poor health
## Irish census choice of: very good, good, fair, bad, very bad, and not stated
poor_health_fields <- c(
    'T12_3_BT', # Bad - Total
    'T12_3_VBT' # Very bad - Total
)
poor_health_data <- census_data[, poor_health_fields, drop = FALSE]
poor_health <- rowSums(poor_health_data, na.rm=TRUE)
poor_health_pct <- (poor_health / population_total) * 100.0
names(poor_health_pct)[1] <- 'poor_health_pct'

# Health - disability
disability_fields <- c(
    'T12_1_T' # Persons with disability - Total
)
disability_data <- census_data[, disability_fields, drop = FALSE]
disability <- rowSums(disability_data, na.rm=TRUE)
disability_pct <- (disability / population_total) * 100.0
names(disability_pct)[1] <- 'disability_pct'

# Combine all these indicators into an array for this domain
health_domain_pct <- cbind(poor_health_pct,
                           disability_pct)

# Print the first six rows of the data to visually check it looks OK
head(health_domain_pct)

A data.frame: 6 × 2
	poor_health_pct	disability_pct
	<dbl>	<dbl>
1	1.670644	19.09308
2	2.028986	25.50725
3	3.550296	23.66864
4	2.277904	20.95672
5	1.960784	16.47059
6	2.616279	27.32558

Income domain#

### Domain:Income ###

# One parent households
one_parent_households_fields <- c(
    'T5_1OPFC_H', # One parent family (father) with children households (No. of households)
    'T5_1OPMC_H', # One parent family (mother) and children households (No. of households)
    'T5_1OPFCO_H',# One parent family (father) with children and others households (No. of households)
    'T5_1OPMCO_H' # One parent family (mother) with children and others households (No. of households)
)
one_parent_households_data <- census_data[, one_parent_households_fields, drop = FALSE]
one_parent_households <- rowSums(one_parent_households_data, na.rm=TRUE)
one_parent_households_pct <- (one_parent_households / households_total) * 100.0
names(one_parent_households_pct)[1] <- 'one_parent_households_pct'

# Three or more children per household
#TODO
## For census purposes, a family is defined as a couple with or without children, or a one parent family with one or more children.
## Family members must be usual residents of the relevant household.
## Note: Families can include children aged 18 years (now adults) and over living with their parents
##  Dividing by total households, not total families
##  Total households = family households + one-person households + non-family households
## Would these be better, children ?:
##  T4_2_3CU15	Familes with 3 children - All children aged under 15
##  T4_2_4CU15	Families with 4 children - All children aged under 15
##  T4_2_GE5CU15	Families with 5 or more children - All children aged under 15
three_or_more_children_households_fields <- c(
    'T4_2_3CT',   # Families with 3 children - Total
    'T4_2_4CT',   # Families with 4 children - Total
    'T4_2_GE5CT'  # Families with 5+ children - Total
)
three_or_more_children_households_data <- census_data[, three_or_more_children_households_fields, drop = FALSE]
three_or_more_children_households <- rowSums(three_or_more_children_households_data, na.rm=TRUE)
three_or_more_children_households_pct <- (three_or_more_children_households / households_total) * 100.0
names(three_or_more_children_households_pct)[1] <- 'three_or_more_children_households_pct'

# Low skilled employment
low_skilled_employment_fields <- c('T9_2_PE', #E Manual skilled (No. of persons)
    'T9_2_PF', #F Semi-skilled (No. of persons)
    'T9_2_PG'  #G Unskilled (No. of persons)
)
low_skilled_employment_data <- census_data[, low_skilled_employment_fields, drop = FALSE]
low_skilled_employment <- rowSums(low_skilled_employment_data, na.rm=TRUE)
low_skilled_employment_pct <- (low_skilled_employment / population_total) * 100.0
names(low_skilled_employment_pct)[1] <- 'low_skilled_employment_pct'

# Farmers
farmers_fields <- c(
    'T9_2_PI' # Farmers (No. of persons)
    #'T9_2_PJ' # Agricultural workers (No. of persons) Forestry and fishing also included
)
farmers_data <- census_data[, farmers_fields, drop = FALSE]
farmers <- rowSums(farmers_data, na.rm=TRUE)
farmers_pct <- (farmers / population_total) * 100.0
names(farmers_pct)[1] <- 'farmers_pct'

# Unemployment
unemployment_fields <- c(
    'T8_1_LFFJT',   # Looking for first regular job - Total
    'T8_1_STUT',    # Short term unemployed - Total 
    'T8_1_LTUT',    # Long term unemployed - Total
    'T8_1_LAHFT',   # Looking after home/family - Total (NOT SURE ABOUT THIS ONE)
    'T8_1_UTWSDT'   # Unable to work due to permanent sickness or disability - Total (MAY CORRELATE WITH HEALTH TOO MUCH)
)
unemployment_data <- census_data[, unemployment_fields, drop = FALSE]
unemployment <- rowSums(unemployment_data, na.rm=TRUE)
unemployment_pct <- (unemployment / population_total) * 100.0
names(unemployment_pct)[1] <- 'unemployment_pct'

# Combine all these indicators into an array for this domain
income_domain_pct <- cbind(one_parent_households_pct,
                           three_or_more_children_households_pct,
                           low_skilled_employment_pct,
                           farmers_pct,
                           unemployment_pct)

# Print the first six rows of the data to visually check it looks OK
head(income_domain_pct)

A data.frame: 6 × 5
	one_parent_households_pct	three_or_more_children_households_pct	low_skilled_employment_pct	farmers_pct	unemployment_pct
	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>
1	6.569343	16.788321	23.62768	14.081146	8.830549
2	8.474576	12.711864	17.10145	9.565217	11.884058
3	14.285714	9.243697	15.97633	0.591716	13.609467
4	7.594937	7.594937	12.98405	4.100228	12.300683
5	8.791209	13.186813	16.86275	12.941176	11.764706
6	8.403361	14.285714	27.03488	12.209302	15.697674

Information Access/Use domain#

### Domain:Information Access/Use ###

# No higher education
no_higher_education_fields <- c(
    'T10_4_NFT' # No formal education - Total
    #'T10_4_PT' # Primary education - Total
)
no_higher_education_data <- census_data[, no_higher_education_fields, drop = FALSE]
no_higher_education <- rowSums(no_higher_education_data, na.rm=TRUE)
no_higher_education_pct <- (no_higher_education / population_total) * 100.0
names(no_higher_education_pct)[1] <- 'no_higher_education_pct'

# Speakers of foreign languages ability to speak English
speak_english_fields <- c(
    'T2_6NW', # Not well
    'T2_6NAA' # Not at all
)
speak_english_data <- census_data[, speak_english_fields, drop = FALSE]
speak_english <- rowSums(speak_english_data, na.rm=TRUE)
speak_english_pct <- (speak_english / population_total) * 100.0
names(speak_english_pct)[1] <- 'speak_english_pct'

# Households with no Internet connection
no_internet_fields <- c(
    'T15_2_NO'# No internet connection
)
no_internet_data <- census_data[, no_internet_fields, drop = FALSE]
no_internet <- rowSums(no_internet_data, na.rm=TRUE)
no_internet_pct <- (no_internet / households_total) * 100.0
names(no_internet_pct)[1] <- 'no_internet_pct'

# Combine all these indicators into an array for this domain
info_domain_pct <- cbind(no_higher_education_pct,
                         speak_english_pct,
                         no_internet_pct)

# Print the first six rows of the data to visually check it looks OK
head(info_domain_pct)

A data.frame: 6 × 3
	no_higher_education_pct	speak_english_pct	no_internet_pct
	<dbl>	<dbl>	<dbl>
1	0.7159905	0.4773270	6.569343
2	0.2898551	0.2898551	12.711864
3	0.8875740	0.5917160	8.403361
4	0.4555809	0.6833713	6.962025
5	0.3921569	0.0000000	15.384615
6	2.9069767	0.5813953	8.403361

Local knowledge domain#

### Domain:Local Knowledge ###

# New residents - Usually resident population aged 1 year and over by usual residence 1 year before Census Day
new_residents_fields <- c(
    'T2_3EI', # Elsewhere in Ireland
    'T2_3OI'  # Outside Ireland
)
new_residents_data <- census_data[, new_residents_fields, drop = FALSE]
new_residents <- rowSums(new_residents_data, na.rm=TRUE)
new_residents_pct <- (new_residents / population_total) * 100.0
names(new_residents_pct)[1] <- 'new_residents_pct'

# Foreigners - foreign nationals (born outside Ireland)
birthplace_total <- census_data[, 'T2_1TBP', drop = FALSE]     # T2_1TBP:  Total - Birthplace
birthplace_ireland <- -census_data[, 'T2_1IEBP', drop = FALSE] # T2_1IEBP: Ireland - Birthplace (made negative)
foreign_nationals_data <- data.frame(birthplace_total, birthplace_ireland)
foreign_nationals <- rowSums(foreign_nationals_data, na.rm=TRUE)
foreign_nationals_pct <- (foreign_nationals / population_total) * 100.0
names(foreign_nationals_pct)[1] <- 'foreign_nationals_pct'

# Combine all these indicators into an array for this domain
local_knowledge_domain_pct <- cbind(new_residents_pct,
                                    foreign_nationals_pct)

# Print the first six rows of the data to visually check it looks OK
head(local_knowledge_domain_pct)

A data.frame: 6 × 2
	new_residents_pct	foreign_nationals_pct
	<dbl>	<dbl>
1	1.9093079	4.773270
2	0.8695652	5.797101
3	1.1834320	13.609467
4	3.4168565	7.517084
5	2.3529412	7.843137
6	3.1976744	8.139535

Mobility domain#

### Domain:Mobility ###

# Households with no motor car
no_car_fields <- c(
    'T15_1_NC' # No motor car
)
no_car_data <- census_data[, no_car_fields, drop = FALSE]
no_car <- rowSums(no_car_data, na.rm=TRUE)
no_car_pct <- (no_car / households_total) * 100.0
names(no_car_pct)[1] <- 'no_car_pct'

# Combine all these indicators into an array for this domain
mobility_domain_pct <- cbind(no_car_pct)

# Print the first six rows of the data to visually check it looks OK
head(mobility_domain_pct)

A data.frame: 6 × 1
	no_car_pct
	<dbl>
1	2.919708
2	4.237288
3	2.521008
4	3.797468
5	1.098901
6	1.680672

Physical access domain#

### Domain:Physical access ###

# Journey time - Population aged 5 years and over by journey time to work, school or college 
journey_time_fields <- c(
    'T11_3_D5', #1 hour - under 1 1/2 hours
    'T11_3_D6'  #1 1/2 hours and over
)
journey_time_data <- census_data[, journey_time_fields, drop = FALSE]
journey_time <- rowSums(journey_time_data, na.rm=TRUE)
journey_time_pct <- (journey_time / population_total) * 100.0
names(journey_time_pct)[1] <- 'journey_time_pct'

# Combine all these indicators into an array for this domain
physical_access_domain_pct <- cbind(journey_time_pct)

# Print the first six rows of the data to visually check it looks OK
head(physical_access_domain_pct)

A data.frame: 6 × 1
	journey_time_pct
	<dbl>
1	7.159905
2	6.376812
3	5.621302
4	3.644647
5	6.274510
6	8.139535

Tenure domain#

###  Domain:Tenure ###

# Permanent private households by type of occupancy 
rented_fields <- c(
    'T6_3_RPLP',  #Rented from private landlord (No. of persons) 
    'T6_3_RLAP',  #Rented from Local Authority (No. of persons)
    'T6_3_RVCHBP' #Rented from voluntary/co-operative housing body (No. of persons)
)
rented_data <- census_data[, rented_fields, drop = FALSE]
rented <- rowSums(rented_data, na.rm=TRUE)
rented_pct <- (rented / population_total) * 100.0
names(rented_pct)[1] <- 'rented_pct'

# Combine all these indicators into an array for this domain
tenure_domain_pct <- cbind(rented_pct)

# Print the first six rows of the data to visually check it looks OK
head(tenure_domain_pct)

A data.frame: 6 × 1
	rented_pct
	<dbl>
1	5.250597
2	6.086957
3	24.852071
4	9.794989
5	3.529412
6	9.883721

Social Network domain data#

### Domain:Social Network ###

# Not volunteers
volunteers_fields <- c(
    'T7_1_VOL' # Number of volunteers
)
volunteers_data <- census_data[, volunteers_fields, drop = FALSE]
volunteers <- rowSums(volunteers_data, na.rm=TRUE)
not_volunteers = population_total - volunteers
not_volunteers_pct <- (not_volunteers / population_total) * 100.0
names(not_volunteers_pct)[1] <- 'not_volunteers_pct'

# Primary school age children
primary_school_age_fields <- c(
    'T1_1AGE4T',  #Age 4 - Total
    'T1_1AGE5T',  #Age 5 - Total
    'T1_1AGE6T',  #Age 6 - Total
    'T1_1AGE7T',  #Age 7 - Total
    'T1_1AGE8T',  #Age 8 - Total
    'T1_1AGE9T',  #Age 9 - Total
    'T1_1AGE10T', #Age 10 - Total
    'T1_1AGE11T', #Age 11 - Total
    'T1_1AGE12T'  #Age 12 - Total
)
primary_school_age_data <- census_data[, primary_school_age_fields, drop = FALSE]
primary_school_age <- rowSums(primary_school_age_data, na.rm=TRUE)
primary_school_age_pct <- (primary_school_age / population_total) * 100.0
names(primary_school_age_pct)[1] <- 'primary_school_age_pct'

# One person households
one_person_households_fields<- c(
    'T5_1OP_H' # One person households
)
one_person_households_data <- census_data[, one_person_households_fields, drop = FALSE]
one_person_households <- rowSums(one_person_households_data, na.rm=TRUE)
one_person_households_pct <- (one_person_households / households_total) * 100.0
names(one_person_households_pct)[1] <- 'one_person_households_pct'

# Combine all these indicators into an array for this domain
social_network_domain_pct <- cbind(not_volunteers_pct,
                                   primary_school_age_pct,
                                   one_person_households_pct)

# Print the first six rows of the data to visually check it looks OK
head(social_network_domain_pct)

A data.frame: 6 × 3
	not_volunteers_pct	primary_school_age_pct	one_person_households_pct
	<dbl>	<dbl>	<dbl>
1	87.35084	14.08115	15.32847
2	79.71014	11.30435	14.40678
3	83.72781	13.01775	19.32773
4	82.23235	12.98405	22.15190
5	83.92157	13.33333	21.97802
6	81.10465	12.79070	16.80672

Housing Characteristics domain#

### Domain: Housing Characteristics ####

# Indicator: Permanent private households by central heating
no_central_heating_fields <- c(
    'T6_5_NCH' # No central heating
)
no_central_heating_data <- census_data[, no_central_heating_fields, drop = FALSE]
no_central_heating <- rowSums(no_central_heating_data, na.rm=TRUE)
no_central_heating_pct <- (no_central_heating / households_total) * 100.0
names(no_central_heating_pct)[1] <- 'no_central_heating_pct'

# Indicator: Permanent private households by water supply 
private_water_supply_fields <- c(
    'T6_6_GSP', # Group scheme with private source
    'T6_6_OP'   # Other private source
)
private_water_supply_data <- census_data[, private_water_supply_fields, drop = FALSE]
private_water_supply <- rowSums(private_water_supply_data, na.rm=TRUE)
private_water_supply_pct <- (private_water_supply / households_total) * 100.0
names(private_water_supply_pct)[1] <- 'private_water_supply_pct'

# Indicator: Permanent private households by year built
year_built_fields <- c(
    'T6_2_PRE19H', # Pre 1919 (No. of households)
    'T6_2_19_45H'  # 1919 - 1945 (No. of households)
)
year_built_data <- census_data[, year_built_fields, drop = FALSE]
year_built <- rowSums(year_built_data, na.rm=TRUE)
year_built_pct <- (year_built / households_total) * 100.0
names(year_built_pct)[1] <- 'year_built_pct'

# Indicator: Permanent private households by year built
year_built_fields <- c(
    'T6_2_PRE19H', # Pre 1919 (No. of households)
    'T6_2_19_45H'  # 1919 - 1945 (No. of households)
)
year_built_data <- census_data[, year_built_fields, drop = FALSE]
year_built <- rowSums(year_built_data, na.rm=TRUE)
year_built_pct <- (year_built / households_total) * 100.0
names(year_built_pct)[1] <- 'year_built_pct'

# Indicator: Private households by type of accommodation  
mobile_home_fields <- c(
    'T6_1_CM_H' # Caravan/Mobile home (No. of households)
)
mobile_home_data <- census_data[, mobile_home_fields, drop = FALSE]
mobile_home <- rowSums(mobile_home_data, na.rm=TRUE)
mobile_home_pct <- (mobile_home / households_total) * 100.0
names(mobile_home_pct)[1] <- 'mobile_home_pct'

# Indicator: Occupancy status of permanent dwellings on Census night  
unoccupied_dwellings_fields <- c(
    'T6_8_TA',  # Temporarily absent
    'T6_8_UHH', # Unoccupied holiday homes
    'T6_8_OVD'  # Other vacant dwellings
)
unoccupied_dwellings_data <- census_data[, unoccupied_dwellings_fields, drop = FALSE]
unoccupied_dwellings <- rowSums(unoccupied_dwellings_data, na.rm=TRUE)
unoccupied_dwellings_pct <- (unoccupied_dwellings / households_total) * 100.0
names(unoccupied_dwellings_pct)[1] <- 'unoccupied_dwellings_pct'

# Combine all these indicators into an array for this domain
housing_characteristics_domain_pct <- cbind(no_central_heating_pct,
                                            private_water_supply_pct,
                                            year_built_pct,
                                            mobile_home_pct,
                                            unoccupied_dwellings_pct
                                           )

# Print the first six rows of the data to visually check it looks OK
head(housing_characteristics_domain_pct)

A data.frame: 6 × 5
	no_central_heating_pct	private_water_supply_pct	year_built_pct	mobile_home_pct	unoccupied_dwellings_pct
	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>
1	0.0000000	87.591241	17.518248	1.4598540	10.218978
2	1.6949153	76.271186	18.644068	0.0000000	3.389831
3	0.8403361	1.680672	5.882353	0.0000000	3.361345
4	0.6329114	30.379747	18.354430	0.6329114	5.063291
5	2.1978022	71.428571	24.175824	1.0989011	7.692308
6	0.8403361	21.848739	28.571429	0.8403361	7.563025

Combine all data into one table#

# Combine all data into one table
indicator_domains_pct <- cbind(census_area_id,
                               age_domain_pct,
                               health_domain_pct,
                               income_domain_pct,
                               info_domain_pct,
                               local_knowledge_domain_pct,
                               mobility_domain_pct,
                               tenure_domain_pct,
                               physical_access_domain_pct,
                               social_network_domain_pct,
                               housing_characteristics_domain_pct)
head(indicator_domains_pct)

A data.frame: 6 × 26
	GUID	early_childhood_pct	age_middle_to_oldest_old_pct	poor_health_pct	disability_pct	one_parent_households_pct	three_or_more_children_households_pct	low_skilled_employment_pct	farmers_pct	unemployment_pct	⋯	rented_pct	journey_time_pct	not_volunteers_pct	primary_school_age_pct	one_person_households_pct	no_central_heating_pct	private_water_supply_pct	year_built_pct	mobile_home_pct	unoccupied_dwellings_pct
	<chr>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	⋯	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>
1	4c07d11e-11d3-851d-e053-ca3ca8c0ca7f	6.443914	4.534606	1.670644	19.09308	6.569343	16.788321	23.62768	14.081146	8.830549	⋯	5.250597	7.159905	87.35084	14.08115	15.32847	0.0000000	87.591241	17.518248	1.4598540	10.218978
2	4c07d11e-123a-851d-e053-ca3ca8c0ca7f	6.086957	11.014493	2.028986	25.50725	8.474576	12.711864	17.10145	9.565217	11.884058	⋯	6.086957	6.376812	79.71014	11.30435	14.40678	1.6949153	76.271186	18.644068	0.0000000	3.389831
3	4c07d11e-14b1-851d-e053-ca3ca8c0ca7f	5.621302	3.550296	3.550296	23.66864	14.285714	9.243697	15.97633	0.591716	13.609467	⋯	24.852071	5.621302	83.72781	13.01775	19.32773	0.8403361	1.680672	5.882353	0.0000000	3.361345
4	bf640964-28f3-4ccf-a610-04685d80ea2e	8.883827	8.883827	2.277904	20.95672	7.594937	7.594937	12.98405	4.100228	12.300683	⋯	9.794989	3.644647	82.23235	12.98405	22.15190	0.6329114	30.379747	18.354430	0.6329114	5.063291
5	4c07d11d-f709-851d-e053-ca3ca8c0ca7f	6.274510	5.098039	1.960784	16.47059	8.791209	13.186813	16.86275	12.941176	11.764706	⋯	3.529412	6.274510	83.92157	13.33333	21.97802	2.1978022	71.428571	24.175824	1.0989011	7.692308
6	4c07d11e-1237-851d-e053-ca3ca8c0ca7f	7.267442	8.139535	2.616279	27.32558	8.403361	14.285714	27.03488	12.209302	15.697674	⋯	9.883721	8.139535	81.10465	12.79070	16.80672	0.8403361	21.848739	28.571429	0.8403361	7.563025

Calculate Z-Score#

The raw data is not suitable for use within the vulnerabiltiy assessment. It needs to be standardised. Therefore, the data is converted to z-scores. Z-scores are:

“A statistical measurement of a score’s relationship to the mean (average value) in a group of scores. A Z-score of 0 means the score is the same as the mean (average value). A Z-score can be positive or negative, indicating whether it is above or below the mean and by how many standard deviations. Z-score standardisation represents the deviation of a raw score from its mean in standard deviation units.” (Kazmierczak et al., 2015)

Calculate the Z-score#

# Copy the data
indicator_z_scores <- indicator_domains_pct

# Get the number of columns in the data
num_cols = ncol(indicator_z_scores)

# Calculate the z scores for each of the relevant columns - starting at the 2nd column
for(col in names(indicator_z_scores)[2:num_cols]) {
  # rename column (remove the '_pct' in the name)
  new_col_name <- gsub("_pct", "", col)
  indicator_z_scores[new_col_name] = scale(indicator_z_scores[col])
}

# Remove the original data to leave only the area identifier and the z scores
indicator_z_scores <- indicator_z_scores[-c(2:num_cols)]

head(indicator_z_scores)

A data.frame: 6 × 26
	GUID	early_childhood	age_middle_to_oldest_old	poor_health	disability	one_parent_households	three_or_more_children_households	low_skilled_employment	farmers	unemployment	⋯	rented	journey_time	not_volunteers	primary_school_age	one_person_households	no_central_heating	private_water_supply	year_built	mobile_home	unoccupied_dwellings
	<chr>	<dbl[,1]>	<dbl[,1]>	<dbl[,1]>	<dbl[,1]>	<dbl[,1]>	<dbl[,1]>	<dbl[,1]>	<dbl[,1]>	<dbl[,1]>	⋯	<dbl[,1]>	<dbl[,1]>	<dbl[,1]>	<dbl[,1]>	<dbl[,1]>	<dbl[,1]>	<dbl[,1]>	<dbl[,1]>	<dbl[,1]>	<dbl[,1]>
1	4c07d11e-11d3-851d-e053-ca3ca8c0ca7f	-0.1079508	-0.4448091	-0.0867229	-0.4589362	-0.6992867	0.7971503	0.3821265	1.8818244	-0.83703083	⋯	-0.9475582	0.6458280	0.2721853	0.53543614	-0.7988385	-0.7784105	2.8290832	0.1247138	0.9507964	-0.2685905
2	4c07d11e-123a-851d-e053-ca3ca8c0ca7f	-0.2315741	0.7875206	0.1669895	0.5523710	-0.4125867	0.1677345	-0.3005233	1.0930868	-0.26121797	⋯	-0.9117719	0.3856925	-1.2744132	-0.09998755	-0.8885092	0.3283476	2.3937964	0.1865261	-0.1877926	-0.5759916
3	4c07d11e-14b1-851d-e053-ca3ca8c0ca7f	-0.3928420	-0.6320030	1.2441066	0.2624819	0.4618750	-0.3677598	-0.4182116	-0.4741963	0.06414964	⋯	-0.1088473	0.1347198	-0.4611731	0.29209620	-0.4097515	-0.2296817	-0.4744110	-0.5141473	-0.1877926	-0.5772738
4	bf640964-28f3-4ccf-a610-04685d80ea2e	0.7370521	0.3823155	0.3432288	-0.1651002	-0.5449551	-0.6223328	-0.7312063	0.1385892	-0.18265319	⋯	-0.7531121	-0.5219048	-0.7638791	0.28438525	-0.1349893	-0.3651274	0.6291462	0.1706238	0.3058362	-0.5006639
5	4c07d11d-f709-851d-e053-ca3ca8c0ca7f	-0.1666197	-0.3376567	0.1187018	-0.8724183	-0.3649398	0.2410678	-0.3254920	1.6827210	-0.28372469	⋯	-1.0212046	0.3517089	-0.4219533	0.36431183	-0.1519057	0.6567264	2.2075847	0.4902435	0.6692771	-0.3823238
6	4c07d11e-1237-851d-e053-ca3ca8c0ca7f	0.1772575	0.2407678	0.5828047	0.8390637	-0.4233032	0.4107411	0.7385225	1.5548942	0.45793149	⋯	-0.7493154	0.9712513	-0.9921427	0.24013874	-0.6550195	-0.2296817	0.3011058	0.7315812	0.4676137	-0.3881432

Output the Z-score data#

# Output the z-score data as a csv file
output_file <- file.path(pipeline_dir, "censusDataZ.csv")
write.csv(indicator_z_scores, output_file, row.names = FALSE)

END