Social Vulnerability Ireland - Census Data#

Environment#

R Libraries#

Any required R libraries are imported into the kernal:

# Load R libraries
## none required

Output directory#

# create the pipeline directory if it does not exist
pipeline_dir <- file.path("../..","2_pipeline","Ireland","1a_CensusData","2022")
if(!dir.exists(pipeline_dir)){
    dir.create(pipeline_dir, recursive = TRUE)
    print(paste0(pipeline_dir, " created"))
}

Load Data#

Import the csv data#

Ireland census data from: https://www.cso.ie/en/census/census2022/census2022smallareapopulationstatistics

# Read the census data
census_data <- read.csv('../../0_data/census/Ireland/2022/SAPS_2022_Small_Area_UR_171024.csv', sep=",")

# remove 'IE0' row from Census 2022 CSV supplied by CSO, and reindex
census_data <- census_data[census_data$GUID != "IE0", ]
row.names(census_data) <- 1:nrow(census_data)

head(census_data)
A data.frame: 6 × 795
GUIDGEOGIDGEOGDESCUR_CategoryUR_Category_DescT1_1AGE0MT1_1AGE1MT1_1AGE2MT1_1AGE3MT1_1AGE4MT15_1_2CT15_1_3CT15_1_GE4CT15_1_NSCT15_1_TCT15_2_BBT15_2_OICT15_2_NOT15_2_NST15_2_T
<chr><chr><chr><int><chr><int><int><int><int><int><int><int><int><int><int><int><int><int><int><int>
14c07d11e-11d3-851d-e053-ca3ca8c0ca7f017001001 017001001 44. Rural areas with high urban influence 06110712067135 8037 9 9135
24c07d11e-123a-851d-e053-ca3ca8c0ca7f017002001 017002001 44. Rural areas with high urban influence 12124601462118 901115 2118
34c07d11e-14b1-851d-e053-ca3ca8c0ca7f017002002 017002002 33. Independent urban towns 1512045 733119101 310 5119
4bf640964-28f3-4ccf-a610-04685d80ea2e017002003/01017002003/0144. Rural areas with high urban influence 45114751587157134 311 9157
54c07d11d-f709-851d-e053-ca3ca8c0ca7f017003001 017003001 55. Rural areas with moderate urban influence01111311969 90 50161410 90
64c07d11e-1237-851d-e053-ca3ca8c0ca7f017003002 017003002 55. Rural areas with moderate urban influence02313571168118 851610 7118

Prepare data#

We only require a subset of the census data for our purposes. We therefore need to extract the relevant data, then combine these to create our vulnerability indicators.

In addition, the raw data is not suitable for use within the vulnerabiltiy assessment. It needs to be normalised based on the number of people/households within each small area. Therefore, the data is converted to percentages based on the total persons/households within each small area.

Supporting data#

Code that uniquely identifies the census area#

identifier <- c('GUID')
census_area_id <- census_data[, identifier, drop = FALSE]

# number of rows in the census data
head( nrow(census_data) )
18919

Population total#

population_total <- census_data[, 'T1_1AGETT', drop = FALSE]
names(population_total)[1] <- 'population_total'
head(population_total)
A data.frame: 6 × 1
population_total
<int>
1419
2345
3338
4439
5255
6344

Households / families total#

households_total <- census_data[, 'T5_1T_H', drop = FALSE]
names(households_total)[1] <- 'households_total'
head(households_total)
A data.frame: 6 × 1
households_total
<int>
1137
2118
3119
4158
5 91
6119

Domain data#

Age domain#

### Domain:Age ###

#TODO: INCLUDE Age 5
# Age - early childhood (0 to 5 yeards old)
early_childhood_fields <- c(
    'T1_1AGE0T', # Age 0 - Total
    'T1_1AGE1T', # Age 1 - Total
    'T1_1AGE2T', # Age 2 - Total
    'T1_1AGE3T', # Age 3 - Total
    'T1_1AGE4T', # Age 4 - Total
    'T1_1AGE5T'  # Age 5 - Total
)
early_childhood_data <- census_data[, early_childhood_fields, drop = FALSE]
early_childhood <- rowSums(early_childhood_data, na.rm=TRUE)
early_childhood_pct <- (early_childhood / population_total) * 100.0
names(early_childhood_pct)[1] <- 'early_childhood_pct'

# Age - middle to oldest old (75+ years old)
age_middle_to_oldest_old_fields <- c(
    'T1_1AGE75_79T', # Age 75 - 79 - Total
    'T1_1AGE80_84T', # Age 80 - 84 - Total
    'T1_1AGEGE_85T'  # Age 85 and over - Total
)
age_middle_to_oldest_old_data <- census_data[, age_middle_to_oldest_old_fields, drop = FALSE]
age_middle_to_oldest_old <- rowSums(age_middle_to_oldest_old_data, na.rm=TRUE)
age_middle_to_oldest_old_pct <- (age_middle_to_oldest_old / population_total) * 100.0
names(age_middle_to_oldest_old_pct)[1] <- 'age_middle_to_oldest_old_pct'

# Combine all these indicators into an array for this domain
age_domain_pct <- cbind(early_childhood_pct,
                        age_middle_to_oldest_old_pct)

# Print the first six rows of the data to visually check it looks OK
head(age_domain_pct)
A data.frame: 6 × 2
early_childhood_pctage_middle_to_oldest_old_pct
<dbl><dbl>
16.443914 4.534606
26.08695711.014493
35.621302 3.550296
48.883827 8.883827
56.274510 5.098039
67.267442 8.139535

Health domain#

### Domain:Health ###

# Health - poor health
## Irish census choice of: very good, good, fair, bad, very bad, and not stated
poor_health_fields <- c(
    'T12_3_BT', # Bad - Total
    'T12_3_VBT' # Very bad - Total
)
poor_health_data <- census_data[, poor_health_fields, drop = FALSE]
poor_health <- rowSums(poor_health_data, na.rm=TRUE)
poor_health_pct <- (poor_health / population_total) * 100.0
names(poor_health_pct)[1] <- 'poor_health_pct'

# Health - disability
disability_fields <- c(
    'T12_1_T' # Persons with disability - Total
)
disability_data <- census_data[, disability_fields, drop = FALSE]
disability <- rowSums(disability_data, na.rm=TRUE)
disability_pct <- (disability / population_total) * 100.0
names(disability_pct)[1] <- 'disability_pct'

# Combine all these indicators into an array for this domain
health_domain_pct <- cbind(poor_health_pct,
                           disability_pct)

# Print the first six rows of the data to visually check it looks OK
head(health_domain_pct)
A data.frame: 6 × 2
poor_health_pctdisability_pct
<dbl><dbl>
11.67064419.09308
22.02898625.50725
33.55029623.66864
42.27790420.95672
51.96078416.47059
62.61627927.32558

Income domain#

### Domain:Income ###

# One parent households
one_parent_households_fields <- c(
    'T5_1OPFC_H', # One parent family (father) with children households (No. of households)
    'T5_1OPMC_H', # One parent family (mother) and children households (No. of households)
    'T5_1OPFCO_H',# One parent family (father) with children and others households (No. of households)
    'T5_1OPMCO_H' # One parent family (mother) with children and others households (No. of households)
)
one_parent_households_data <- census_data[, one_parent_households_fields, drop = FALSE]
one_parent_households <- rowSums(one_parent_households_data, na.rm=TRUE)
one_parent_households_pct <- (one_parent_households / households_total) * 100.0
names(one_parent_households_pct)[1] <- 'one_parent_households_pct'

# Three or more children per household
#TODO
## For census purposes, a family is defined as a couple with or without children, or a one parent family with one or more children.
## Family members must be usual residents of the relevant household.
## Note: Families can include children aged 18 years (now adults) and over living with their parents
##  Dividing by total households, not total families
##  Total households = family households + one-person households + non-family households
## Would these be better, children ?:
##  T4_2_3CU15	Familes with 3 children - All children aged under 15
##  T4_2_4CU15	Families with 4 children - All children aged under 15
##  T4_2_GE5CU15	Families with 5 or more children - All children aged under 15
three_or_more_children_households_fields <- c(
    'T4_2_3CT',   # Families with 3 children - Total
    'T4_2_4CT',   # Families with 4 children - Total
    'T4_2_GE5CT'  # Families with 5+ children - Total
)
three_or_more_children_households_data <- census_data[, three_or_more_children_households_fields, drop = FALSE]
three_or_more_children_households <- rowSums(three_or_more_children_households_data, na.rm=TRUE)
three_or_more_children_households_pct <- (three_or_more_children_households / households_total) * 100.0
names(three_or_more_children_households_pct)[1] <- 'three_or_more_children_households_pct'

# Low skilled employment
low_skilled_employment_fields <- c('T9_2_PE', #E Manual skilled (No. of persons)
    'T9_2_PF', #F Semi-skilled (No. of persons)
    'T9_2_PG'  #G Unskilled (No. of persons)
)
low_skilled_employment_data <- census_data[, low_skilled_employment_fields, drop = FALSE]
low_skilled_employment <- rowSums(low_skilled_employment_data, na.rm=TRUE)
low_skilled_employment_pct <- (low_skilled_employment / population_total) * 100.0
names(low_skilled_employment_pct)[1] <- 'low_skilled_employment_pct'

# Farmers
farmers_fields <- c(
    'T9_2_PI' # Farmers (No. of persons)
    #'T9_2_PJ' # Agricultural workers (No. of persons) Forestry and fishing also included
)
farmers_data <- census_data[, farmers_fields, drop = FALSE]
farmers <- rowSums(farmers_data, na.rm=TRUE)
farmers_pct <- (farmers / population_total) * 100.0
names(farmers_pct)[1] <- 'farmers_pct'

# Unemployment
unemployment_fields <- c(
    'T8_1_LFFJT',   # Looking for first regular job - Total
    'T8_1_STUT',    # Short term unemployed - Total 
    'T8_1_LTUT',    # Long term unemployed - Total
    'T8_1_LAHFT',   # Looking after home/family - Total (NOT SURE ABOUT THIS ONE)
    'T8_1_UTWSDT'   # Unable to work due to permanent sickness or disability - Total (MAY CORRELATE WITH HEALTH TOO MUCH)
)
unemployment_data <- census_data[, unemployment_fields, drop = FALSE]
unemployment <- rowSums(unemployment_data, na.rm=TRUE)
unemployment_pct <- (unemployment / population_total) * 100.0
names(unemployment_pct)[1] <- 'unemployment_pct'

# Combine all these indicators into an array for this domain
income_domain_pct <- cbind(one_parent_households_pct,
                           three_or_more_children_households_pct,
                           low_skilled_employment_pct,
                           farmers_pct,
                           unemployment_pct)

# Print the first six rows of the data to visually check it looks OK
head(income_domain_pct)
A data.frame: 6 × 5
one_parent_households_pctthree_or_more_children_households_pctlow_skilled_employment_pctfarmers_pctunemployment_pct
<dbl><dbl><dbl><dbl><dbl>
1 6.56934316.78832123.6276814.081146 8.830549
2 8.47457612.71186417.10145 9.56521711.884058
314.285714 9.24369715.97633 0.59171613.609467
4 7.594937 7.59493712.98405 4.10022812.300683
5 8.79120913.18681316.8627512.94117611.764706
6 8.40336114.28571427.0348812.20930215.697674

Information Access/Use domain#

### Domain:Information Access/Use ###

# No higher education
no_higher_education_fields <- c(
    'T10_4_NFT' # No formal education - Total
    #'T10_4_PT' # Primary education - Total
)
no_higher_education_data <- census_data[, no_higher_education_fields, drop = FALSE]
no_higher_education <- rowSums(no_higher_education_data, na.rm=TRUE)
no_higher_education_pct <- (no_higher_education / population_total) * 100.0
names(no_higher_education_pct)[1] <- 'no_higher_education_pct'

# Speakers of foreign languages ability to speak English
speak_english_fields <- c(
    'T2_6NW', # Not well
    'T2_6NAA' # Not at all
)
speak_english_data <- census_data[, speak_english_fields, drop = FALSE]
speak_english <- rowSums(speak_english_data, na.rm=TRUE)
speak_english_pct <- (speak_english / population_total) * 100.0
names(speak_english_pct)[1] <- 'speak_english_pct'

# Households with no Internet connection
no_internet_fields <- c(
    'T15_2_NO'# No internet connection
)
no_internet_data <- census_data[, no_internet_fields, drop = FALSE]
no_internet <- rowSums(no_internet_data, na.rm=TRUE)
no_internet_pct <- (no_internet / households_total) * 100.0
names(no_internet_pct)[1] <- 'no_internet_pct'

# Combine all these indicators into an array for this domain
info_domain_pct <- cbind(no_higher_education_pct,
                         speak_english_pct,
                         no_internet_pct)

# Print the first six rows of the data to visually check it looks OK
head(info_domain_pct)
A data.frame: 6 × 3
no_higher_education_pctspeak_english_pctno_internet_pct
<dbl><dbl><dbl>
10.71599050.4773270 6.569343
20.28985510.289855112.711864
30.88757400.5917160 8.403361
40.45558090.6833713 6.962025
50.39215690.000000015.384615
62.90697670.5813953 8.403361

Local knowledge domain#

### Domain:Local Knowledge ###

# New residents - Usually resident population aged 1 year and over by usual residence 1 year before Census Day
new_residents_fields <- c(
    'T2_3EI', # Elsewhere in Ireland
    'T2_3OI'  # Outside Ireland
)
new_residents_data <- census_data[, new_residents_fields, drop = FALSE]
new_residents <- rowSums(new_residents_data, na.rm=TRUE)
new_residents_pct <- (new_residents / population_total) * 100.0
names(new_residents_pct)[1] <- 'new_residents_pct'

# Foreigners - foreign nationals (born outside Ireland)
birthplace_total <- census_data[, 'T2_1TBP', drop = FALSE]     # T2_1TBP:  Total - Birthplace
birthplace_ireland <- -census_data[, 'T2_1IEBP', drop = FALSE] # T2_1IEBP: Ireland - Birthplace (made negative)
foreign_nationals_data <- data.frame(birthplace_total, birthplace_ireland)
foreign_nationals <- rowSums(foreign_nationals_data, na.rm=TRUE)
foreign_nationals_pct <- (foreign_nationals / population_total) * 100.0
names(foreign_nationals_pct)[1] <- 'foreign_nationals_pct'

# Combine all these indicators into an array for this domain
local_knowledge_domain_pct <- cbind(new_residents_pct,
                                    foreign_nationals_pct)

# Print the first six rows of the data to visually check it looks OK
head(local_knowledge_domain_pct)
A data.frame: 6 × 2
new_residents_pctforeign_nationals_pct
<dbl><dbl>
11.9093079 4.773270
20.8695652 5.797101
31.183432013.609467
43.4168565 7.517084
52.3529412 7.843137
63.1976744 8.139535

Mobility domain#

### Domain:Mobility ###

# Households with no motor car
no_car_fields <- c(
    'T15_1_NC' # No motor car
)
no_car_data <- census_data[, no_car_fields, drop = FALSE]
no_car <- rowSums(no_car_data, na.rm=TRUE)
no_car_pct <- (no_car / households_total) * 100.0
names(no_car_pct)[1] <- 'no_car_pct'

# Combine all these indicators into an array for this domain
mobility_domain_pct <- cbind(no_car_pct)

# Print the first six rows of the data to visually check it looks OK
head(mobility_domain_pct)
A data.frame: 6 × 1
no_car_pct
<dbl>
12.919708
24.237288
32.521008
43.797468
51.098901
61.680672

Physical access domain#

### Domain:Physical access ###

# Journey time - Population aged 5 years and over by journey time to work, school or college 
journey_time_fields <- c(
    'T11_3_D5', #1 hour - under 1 1/2 hours
    'T11_3_D6'  #1 1/2 hours and over
)
journey_time_data <- census_data[, journey_time_fields, drop = FALSE]
journey_time <- rowSums(journey_time_data, na.rm=TRUE)
journey_time_pct <- (journey_time / population_total) * 100.0
names(journey_time_pct)[1] <- 'journey_time_pct'

# Combine all these indicators into an array for this domain
physical_access_domain_pct <- cbind(journey_time_pct)

# Print the first six rows of the data to visually check it looks OK
head(physical_access_domain_pct)
A data.frame: 6 × 1
journey_time_pct
<dbl>
17.159905
26.376812
35.621302
43.644647
56.274510
68.139535

Tenure domain#

###  Domain:Tenure ###

# Permanent private households by type of occupancy 
rented_fields <- c(
    'T6_3_RPLP',  #Rented from private landlord (No. of persons) 
    'T6_3_RLAP',  #Rented from Local Authority (No. of persons)
    'T6_3_RVCHBP' #Rented from voluntary/co-operative housing body (No. of persons)
)
rented_data <- census_data[, rented_fields, drop = FALSE]
rented <- rowSums(rented_data, na.rm=TRUE)
rented_pct <- (rented / population_total) * 100.0
names(rented_pct)[1] <- 'rented_pct'

# Combine all these indicators into an array for this domain
tenure_domain_pct <- cbind(rented_pct)

# Print the first six rows of the data to visually check it looks OK
head(tenure_domain_pct)
A data.frame: 6 × 1
rented_pct
<dbl>
1 5.250597
2 6.086957
324.852071
4 9.794989
5 3.529412
6 9.883721

Social Network domain data#

### Domain:Social Network ###

# Not volunteers
volunteers_fields <- c(
    'T7_1_VOL' # Number of volunteers
)
volunteers_data <- census_data[, volunteers_fields, drop = FALSE]
volunteers <- rowSums(volunteers_data, na.rm=TRUE)
not_volunteers = population_total - volunteers
not_volunteers_pct <- (not_volunteers / population_total) * 100.0
names(not_volunteers_pct)[1] <- 'not_volunteers_pct'

# Primary school age children
primary_school_age_fields <- c(
    'T1_1AGE4T',  #Age 4 - Total
    'T1_1AGE5T',  #Age 5 - Total
    'T1_1AGE6T',  #Age 6 - Total
    'T1_1AGE7T',  #Age 7 - Total
    'T1_1AGE8T',  #Age 8 - Total
    'T1_1AGE9T',  #Age 9 - Total
    'T1_1AGE10T', #Age 10 - Total
    'T1_1AGE11T', #Age 11 - Total
    'T1_1AGE12T'  #Age 12 - Total
)
primary_school_age_data <- census_data[, primary_school_age_fields, drop = FALSE]
primary_school_age <- rowSums(primary_school_age_data, na.rm=TRUE)
primary_school_age_pct <- (primary_school_age / population_total) * 100.0
names(primary_school_age_pct)[1] <- 'primary_school_age_pct'

# One person households
one_person_households_fields<- c(
    'T5_1OP_H' # One person households
)
one_person_households_data <- census_data[, one_person_households_fields, drop = FALSE]
one_person_households <- rowSums(one_person_households_data, na.rm=TRUE)
one_person_households_pct <- (one_person_households / households_total) * 100.0
names(one_person_households_pct)[1] <- 'one_person_households_pct'

# Combine all these indicators into an array for this domain
social_network_domain_pct <- cbind(not_volunteers_pct,
                                   primary_school_age_pct,
                                   one_person_households_pct)

# Print the first six rows of the data to visually check it looks OK
head(social_network_domain_pct)
A data.frame: 6 × 3
not_volunteers_pctprimary_school_age_pctone_person_households_pct
<dbl><dbl><dbl>
187.3508414.0811515.32847
279.7101411.3043514.40678
383.7278113.0177519.32773
482.2323512.9840522.15190
583.9215713.3333321.97802
681.1046512.7907016.80672

Housing Characteristics domain#

### Domain: Housing Characteristics ####

# Indicator: Permanent private households by central heating
no_central_heating_fields <- c(
    'T6_5_NCH' # No central heating
)
no_central_heating_data <- census_data[, no_central_heating_fields, drop = FALSE]
no_central_heating <- rowSums(no_central_heating_data, na.rm=TRUE)
no_central_heating_pct <- (no_central_heating / households_total) * 100.0
names(no_central_heating_pct)[1] <- 'no_central_heating_pct'

# Indicator: Permanent private households by water supply 
private_water_supply_fields <- c(
    'T6_6_GSP', # Group scheme with private source
    'T6_6_OP'   # Other private source
)
private_water_supply_data <- census_data[, private_water_supply_fields, drop = FALSE]
private_water_supply <- rowSums(private_water_supply_data, na.rm=TRUE)
private_water_supply_pct <- (private_water_supply / households_total) * 100.0
names(private_water_supply_pct)[1] <- 'private_water_supply_pct'

# Indicator: Permanent private households by year built
year_built_fields <- c(
    'T6_2_PRE19H', # Pre 1919 (No. of households)
    'T6_2_19_45H'  # 1919 - 1945 (No. of households)
)
year_built_data <- census_data[, year_built_fields, drop = FALSE]
year_built <- rowSums(year_built_data, na.rm=TRUE)
year_built_pct <- (year_built / households_total) * 100.0
names(year_built_pct)[1] <- 'year_built_pct'

# Indicator: Permanent private households by year built
year_built_fields <- c(
    'T6_2_PRE19H', # Pre 1919 (No. of households)
    'T6_2_19_45H'  # 1919 - 1945 (No. of households)
)
year_built_data <- census_data[, year_built_fields, drop = FALSE]
year_built <- rowSums(year_built_data, na.rm=TRUE)
year_built_pct <- (year_built / households_total) * 100.0
names(year_built_pct)[1] <- 'year_built_pct'

# Indicator: Private households by type of accommodation  
mobile_home_fields <- c(
    'T6_1_CM_H' # Caravan/Mobile home (No. of households)
)
mobile_home_data <- census_data[, mobile_home_fields, drop = FALSE]
mobile_home <- rowSums(mobile_home_data, na.rm=TRUE)
mobile_home_pct <- (mobile_home / households_total) * 100.0
names(mobile_home_pct)[1] <- 'mobile_home_pct'

# Indicator: Occupancy status of permanent dwellings on Census night  
unoccupied_dwellings_fields <- c(
    'T6_8_TA',  # Temporarily absent
    'T6_8_UHH', # Unoccupied holiday homes
    'T6_8_OVD'  # Other vacant dwellings
)
unoccupied_dwellings_data <- census_data[, unoccupied_dwellings_fields, drop = FALSE]
unoccupied_dwellings <- rowSums(unoccupied_dwellings_data, na.rm=TRUE)
unoccupied_dwellings_pct <- (unoccupied_dwellings / households_total) * 100.0
names(unoccupied_dwellings_pct)[1] <- 'unoccupied_dwellings_pct'

# Combine all these indicators into an array for this domain
housing_characteristics_domain_pct <- cbind(no_central_heating_pct,
                                            private_water_supply_pct,
                                            year_built_pct,
                                            mobile_home_pct,
                                            unoccupied_dwellings_pct
                                           )

# Print the first six rows of the data to visually check it looks OK
head(housing_characteristics_domain_pct)
A data.frame: 6 × 5
no_central_heating_pctprivate_water_supply_pctyear_built_pctmobile_home_pctunoccupied_dwellings_pct
<dbl><dbl><dbl><dbl><dbl>
10.000000087.59124117.5182481.459854010.218978
21.694915376.27118618.6440680.0000000 3.389831
30.8403361 1.680672 5.8823530.0000000 3.361345
40.632911430.37974718.3544300.6329114 5.063291
52.197802271.42857124.1758241.0989011 7.692308
60.840336121.84873928.5714290.8403361 7.563025

Combine all data into one table#

# Combine all data into one table
indicator_domains_pct <- cbind(census_area_id,
                               age_domain_pct,
                               health_domain_pct,
                               income_domain_pct,
                               info_domain_pct,
                               local_knowledge_domain_pct,
                               mobility_domain_pct,
                               tenure_domain_pct,
                               physical_access_domain_pct,
                               social_network_domain_pct,
                               housing_characteristics_domain_pct)
head(indicator_domains_pct)
A data.frame: 6 × 26
GUIDearly_childhood_pctage_middle_to_oldest_old_pctpoor_health_pctdisability_pctone_parent_households_pctthree_or_more_children_households_pctlow_skilled_employment_pctfarmers_pctunemployment_pctrented_pctjourney_time_pctnot_volunteers_pctprimary_school_age_pctone_person_households_pctno_central_heating_pctprivate_water_supply_pctyear_built_pctmobile_home_pctunoccupied_dwellings_pct
<chr><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
14c07d11e-11d3-851d-e053-ca3ca8c0ca7f6.443914 4.5346061.67064419.09308 6.56934316.78832123.6276814.081146 8.830549 5.2505977.15990587.3508414.0811515.328470.000000087.59124117.5182481.459854010.218978
24c07d11e-123a-851d-e053-ca3ca8c0ca7f6.08695711.0144932.02898625.50725 8.47457612.71186417.10145 9.56521711.884058 6.0869576.37681279.7101411.3043514.406781.694915376.27118618.6440680.0000000 3.389831
34c07d11e-14b1-851d-e053-ca3ca8c0ca7f5.621302 3.5502963.55029623.6686414.285714 9.24369715.97633 0.59171613.60946724.8520715.62130283.7278113.0177519.327730.8403361 1.680672 5.8823530.0000000 3.361345
4bf640964-28f3-4ccf-a610-04685d80ea2e8.883827 8.8838272.27790420.95672 7.594937 7.59493712.98405 4.10022812.300683 9.7949893.64464782.2323512.9840522.151900.632911430.37974718.3544300.6329114 5.063291
54c07d11d-f709-851d-e053-ca3ca8c0ca7f6.274510 5.0980391.96078416.47059 8.79120913.18681316.8627512.94117611.764706 3.5294126.27451083.9215713.3333321.978022.197802271.42857124.1758241.0989011 7.692308
64c07d11e-1237-851d-e053-ca3ca8c0ca7f7.267442 8.1395352.61627927.32558 8.40336114.28571427.0348812.20930215.697674 9.8837218.13953581.1046512.7907016.806720.840336121.84873928.5714290.8403361 7.563025

Calculate Z-Score#

The raw data is not suitable for use within the vulnerabiltiy assessment. It needs to be standardised. Therefore, the data is converted to z-scores. Z-scores are:

“A statistical measurement of a score’s relationship to the mean (average value) in a group of scores. A Z-score of 0 means the score is the same as the mean (average value). A Z-score can be positive or negative, indicating whether it is above or below the mean and by how many standard deviations. Z-score standardisation represents the deviation of a raw score from its mean in standard deviation units.” (Kazmierczak et al., 2015)

Calculate the Z-score#

# Copy the data
indicator_z_scores <- indicator_domains_pct

# Get the number of columns in the data
num_cols = ncol(indicator_z_scores)

# Calculate the z scores for each of the relevant columns - starting at the 2nd column
for(col in names(indicator_z_scores)[2:num_cols]) {
  # rename column (remove the '_pct' in the name)
  new_col_name <- gsub("_pct", "", col)
  indicator_z_scores[new_col_name] = scale(indicator_z_scores[col])
}

# Remove the original data to leave only the area identifier and the z scores
indicator_z_scores <- indicator_z_scores[-c(2:num_cols)]

head(indicator_z_scores)
A data.frame: 6 × 26
GUIDearly_childhoodage_middle_to_oldest_oldpoor_healthdisabilityone_parent_householdsthree_or_more_children_householdslow_skilled_employmentfarmersunemploymentrentedjourney_timenot_volunteersprimary_school_ageone_person_householdsno_central_heatingprivate_water_supplyyear_builtmobile_homeunoccupied_dwellings
<chr><dbl[,1]><dbl[,1]><dbl[,1]><dbl[,1]><dbl[,1]><dbl[,1]><dbl[,1]><dbl[,1]><dbl[,1]><dbl[,1]><dbl[,1]><dbl[,1]><dbl[,1]><dbl[,1]><dbl[,1]><dbl[,1]><dbl[,1]><dbl[,1]><dbl[,1]>
14c07d11e-11d3-851d-e053-ca3ca8c0ca7f-0.1079508-0.4448091-0.0867229-0.4589362-0.6992867 0.7971503 0.3821265 1.8818244-0.83703083-0.9475582 0.6458280 0.2721853 0.53543614-0.7988385-0.7784105 2.8290832 0.1247138 0.9507964-0.2685905
24c07d11e-123a-851d-e053-ca3ca8c0ca7f-0.2315741 0.7875206 0.1669895 0.5523710-0.4125867 0.1677345-0.3005233 1.0930868-0.26121797-0.9117719 0.3856925-1.2744132-0.09998755-0.8885092 0.3283476 2.3937964 0.1865261-0.1877926-0.5759916
34c07d11e-14b1-851d-e053-ca3ca8c0ca7f-0.3928420-0.6320030 1.2441066 0.2624819 0.4618750-0.3677598-0.4182116-0.4741963 0.06414964-0.1088473 0.1347198-0.4611731 0.29209620-0.4097515-0.2296817-0.4744110-0.5141473-0.1877926-0.5772738
4bf640964-28f3-4ccf-a610-04685d80ea2e 0.7370521 0.3823155 0.3432288-0.1651002-0.5449551-0.6223328-0.7312063 0.1385892-0.18265319-0.7531121-0.5219048-0.7638791 0.28438525-0.1349893-0.3651274 0.6291462 0.1706238 0.3058362-0.5006639
54c07d11d-f709-851d-e053-ca3ca8c0ca7f-0.1666197-0.3376567 0.1187018-0.8724183-0.3649398 0.2410678-0.3254920 1.6827210-0.28372469-1.0212046 0.3517089-0.4219533 0.36431183-0.1519057 0.6567264 2.2075847 0.4902435 0.6692771-0.3823238
64c07d11e-1237-851d-e053-ca3ca8c0ca7f 0.1772575 0.2407678 0.5828047 0.8390637-0.4233032 0.4107411 0.7385225 1.5548942 0.45793149-0.7493154 0.9712513-0.9921427 0.24013874-0.6550195-0.2296817 0.3011058 0.7315812 0.4676137-0.3881432

Output the Z-score data#

# Output the z-score data as a csv file
output_file <- file.path(pipeline_dir, "censusDataZ.csv")
write.csv(indicator_z_scores, output_file, row.names = FALSE)

END