{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Social Vulnerability Milan - Census Data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Environment"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### R Libraries\n",
    "Any required R libraries are imported into the kernal:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load R libraries\n",
    "## none required"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Output directory"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# create the pipeline directory if it does not exist\n",
    "pipeline_dir <- file.path(\"../..\",\"2_pipeline\",\"Italy\",\"Milan\",\"1a_CensusData\",\"2021\")\n",
    "if(!dir.exists(pipeline_dir)){\n",
    "    dir.create(pipeline_dir, recursive = TRUE)\n",
    "    print(paste0(pipeline_dir, \" created\"))\n",
    "}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Load Data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Import the csv data\n",
    "Ireland census data from: https://www.cso.ie/en/census/census2022/census2022smallareapopulationstatistics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"dataframe\">\n",
       "<caption>A data.frame: 6 × 136</caption>\n",
       "<thead>\n",
       "\t<tr><th></th><th scope=col>CODREG</th><th scope=col>REGIONE</th><th scope=col>CODPRO</th><th scope=col>PROVINCIA</th><th scope=col>CODCOM</th><th scope=col>COMUNE</th><th scope=col>PROCOM</th><th scope=col>SEZ2011</th><th scope=col>ASC_1LIV_COD</th><th scope=col>ASC_1LIV_NOME</th><th scope=col>⋯</th><th scope=col>CIT_1_EGY</th><th scope=col>CIT_2_PHL</th><th scope=col>CIT_3_CHN</th><th scope=col>CIT_4_LKA</th><th scope=col>CIT_5_PER</th><th scope=col>CIT_6_ROU</th><th scope=col>CIT_7_BGD</th><th scope=col>CIT_8_ECU</th><th scope=col>CIT_9_MAR</th><th scope=col>CIT_10_UKR</th></tr>\n",
       "\t<tr><th></th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;chr&gt;</th><th scope=col>⋯</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;int&gt;</th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "\t<tr><th scope=row>1</th><td>3</td><td>Lombardia</td><td>15</td><td>Milano</td><td>146</td><td>Milano</td><td>15146</td><td>1.5146e+11</td><td>15146001</td><td>Centro Storico</td><td>⋯</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>1</td></tr>\n",
       "\t<tr><th scope=row>2</th><td>3</td><td>Lombardia</td><td>15</td><td>Milano</td><td>146</td><td>Milano</td><td>15146</td><td>1.5146e+11</td><td>15146001</td><td>Centro Storico</td><td>⋯</td><td>0</td><td>4</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td></tr>\n",
       "\t<tr><th scope=row>3</th><td>3</td><td>Lombardia</td><td>15</td><td>Milano</td><td>146</td><td>Milano</td><td>15146</td><td>1.5146e+11</td><td>15146001</td><td>Centro Storico</td><td>⋯</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td></tr>\n",
       "\t<tr><th scope=row>4</th><td>3</td><td>Lombardia</td><td>15</td><td>Milano</td><td>146</td><td>Milano</td><td>15146</td><td>1.5146e+11</td><td>15146001</td><td>Centro Storico</td><td>⋯</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td></tr>\n",
       "\t<tr><th scope=row>5</th><td>3</td><td>Lombardia</td><td>15</td><td>Milano</td><td>146</td><td>Milano</td><td>15146</td><td>1.5146e+11</td><td>15146001</td><td>Centro Storico</td><td>⋯</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td></tr>\n",
       "\t<tr><th scope=row>6</th><td>3</td><td>Lombardia</td><td>15</td><td>Milano</td><td>146</td><td>Milano</td><td>15146</td><td>1.5146e+11</td><td>15146001</td><td>Centro Storico</td><td>⋯</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td></tr>\n",
       "</tbody>\n",
       "</table>\n"
      ],
      "text/latex": [
       "A data.frame: 6 × 136\n",
       "\\begin{tabular}{r|lllllllllllllllllllll}\n",
       "  & CODREG & REGIONE & CODPRO & PROVINCIA & CODCOM & COMUNE & PROCOM & SEZ2011 & ASC\\_1LIV\\_COD & ASC\\_1LIV\\_NOME & ⋯ & CIT\\_1\\_EGY & CIT\\_2\\_PHL & CIT\\_3\\_CHN & CIT\\_4\\_LKA & CIT\\_5\\_PER & CIT\\_6\\_ROU & CIT\\_7\\_BGD & CIT\\_8\\_ECU & CIT\\_9\\_MAR & CIT\\_10\\_UKR\\\\\n",
       "  & <int> & <chr> & <int> & <chr> & <int> & <chr> & <int> & <dbl> & <int> & <chr> & ⋯ & <int> & <int> & <int> & <int> & <int> & <int> & <int> & <int> & <int> & <int>\\\\\n",
       "\\hline\n",
       "\t1 & 3 & Lombardia & 15 & Milano & 146 & Milano & 15146 & 1.5146e+11 & 15146001 & Centro Storico & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 1\\\\\n",
       "\t2 & 3 & Lombardia & 15 & Milano & 146 & Milano & 15146 & 1.5146e+11 & 15146001 & Centro Storico & ⋯ & 0 & 4 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n",
       "\t3 & 3 & Lombardia & 15 & Milano & 146 & Milano & 15146 & 1.5146e+11 & 15146001 & Centro Storico & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n",
       "\t4 & 3 & Lombardia & 15 & Milano & 146 & Milano & 15146 & 1.5146e+11 & 15146001 & Centro Storico & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n",
       "\t5 & 3 & Lombardia & 15 & Milano & 146 & Milano & 15146 & 1.5146e+11 & 15146001 & Centro Storico & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n",
       "\t6 & 3 & Lombardia & 15 & Milano & 146 & Milano & 15146 & 1.5146e+11 & 15146001 & Centro Storico & ⋯ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\\n",
       "\\end{tabular}\n"
      ],
      "text/markdown": [
       "\n",
       "A data.frame: 6 × 136\n",
       "\n",
       "| <!--/--> | CODREG &lt;int&gt; | REGIONE &lt;chr&gt; | CODPRO &lt;int&gt; | PROVINCIA &lt;chr&gt; | CODCOM &lt;int&gt; | COMUNE &lt;chr&gt; | PROCOM &lt;int&gt; | SEZ2011 &lt;dbl&gt; | ASC_1LIV_COD &lt;int&gt; | ASC_1LIV_NOME &lt;chr&gt; | ⋯ ⋯ | CIT_1_EGY &lt;int&gt; | CIT_2_PHL &lt;int&gt; | CIT_3_CHN &lt;int&gt; | CIT_4_LKA &lt;int&gt; | CIT_5_PER &lt;int&gt; | CIT_6_ROU &lt;int&gt; | CIT_7_BGD &lt;int&gt; | CIT_8_ECU &lt;int&gt; | CIT_9_MAR &lt;int&gt; | CIT_10_UKR &lt;int&gt; |\n",
       "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n",
       "| 1 | 3 | Lombardia | 15 | Milano | 146 | Milano | 15146 | 1.5146e+11 | 15146001 | Centro Storico | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |\n",
       "| 2 | 3 | Lombardia | 15 | Milano | 146 | Milano | 15146 | 1.5146e+11 | 15146001 | Centro Storico | ⋯ | 0 | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n",
       "| 3 | 3 | Lombardia | 15 | Milano | 146 | Milano | 15146 | 1.5146e+11 | 15146001 | Centro Storico | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n",
       "| 4 | 3 | Lombardia | 15 | Milano | 146 | Milano | 15146 | 1.5146e+11 | 15146001 | Centro Storico | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n",
       "| 5 | 3 | Lombardia | 15 | Milano | 146 | Milano | 15146 | 1.5146e+11 | 15146001 | Centro Storico | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n",
       "| 6 | 3 | Lombardia | 15 | Milano | 146 | Milano | 15146 | 1.5146e+11 | 15146001 | Centro Storico | ⋯ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n",
       "\n"
      ],
      "text/plain": [
       "  CODREG REGIONE   CODPRO PROVINCIA CODCOM COMUNE PROCOM SEZ2011   \n",
       "1 3      Lombardia 15     Milano    146    Milano 15146  1.5146e+11\n",
       "2 3      Lombardia 15     Milano    146    Milano 15146  1.5146e+11\n",
       "3 3      Lombardia 15     Milano    146    Milano 15146  1.5146e+11\n",
       "4 3      Lombardia 15     Milano    146    Milano 15146  1.5146e+11\n",
       "5 3      Lombardia 15     Milano    146    Milano 15146  1.5146e+11\n",
       "6 3      Lombardia 15     Milano    146    Milano 15146  1.5146e+11\n",
       "  ASC_1LIV_COD ASC_1LIV_NOME  ⋯ CIT_1_EGY CIT_2_PHL CIT_3_CHN CIT_4_LKA\n",
       "1 15146001     Centro Storico ⋯ 0         0         0         0        \n",
       "2 15146001     Centro Storico ⋯ 0         4         0         0        \n",
       "3 15146001     Centro Storico ⋯ 0         0         0         0        \n",
       "4 15146001     Centro Storico ⋯ 0         0         0         0        \n",
       "5 15146001     Centro Storico ⋯ 0         0         0         0        \n",
       "6 15146001     Centro Storico ⋯ 0         0         0         0        \n",
       "  CIT_5_PER CIT_6_ROU CIT_7_BGD CIT_8_ECU CIT_9_MAR CIT_10_UKR\n",
       "1 0         0         0         0         0         1         \n",
       "2 0         0         0         0         0         0         \n",
       "3 0         0         0         0         0         0         \n",
       "4 0         0         0         0         0         0         \n",
       "5 0         0         0         0         0         0         \n",
       "6 0         0         0         0         0         0         "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Read the census data\n",
    "census_data <- read.csv('../../0_data/census/Italy/Milan/2021/Milano_indicatori_2021_sezioni.csv', sep=\",\")\n",
    "head(census_data)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Prepare data\n",
    "We only require a subset of the census data for our purposes. We therefore need to extract the relevant data, then combine these to create our vulnerability indicators.\n",
    "\n",
    "In addition, the raw data is not suitable for use within the vulnerabiltiy assessment. It needs to be normalised based on the number of people/households within each small area. Therefore, the data is converted to percentages based on the total persons/households within each small area."
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Supporting data"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Code that uniquely identifies the census area"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "5740"
      ],
      "text/latex": [
       "5740"
      ],
      "text/markdown": [
       "5740"
      ],
      "text/plain": [
       "[1] 5740"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "identifier <- c('SEZ2011')\n",
    "census_area_id <- census_data[, identifier, drop = FALSE]\n",
    "\n",
    "# number of rows in the census data\n",
    "head( nrow(census_data) )"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Population total"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"dataframe\">\n",
       "<caption>A data.frame: 6 × 1</caption>\n",
       "<thead>\n",
       "\t<tr><th></th><th scope=col>population_total</th></tr>\n",
       "\t<tr><th></th><th scope=col>&lt;int&gt;</th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "\t<tr><th scope=row>1</th><td>13</td></tr>\n",
       "\t<tr><th scope=row>2</th><td>16</td></tr>\n",
       "\t<tr><th scope=row>3</th><td>17</td></tr>\n",
       "\t<tr><th scope=row>4</th><td>11</td></tr>\n",
       "\t<tr><th scope=row>5</th><td> 1</td></tr>\n",
       "\t<tr><th scope=row>6</th><td> 6</td></tr>\n",
       "</tbody>\n",
       "</table>\n"
      ],
      "text/latex": [
       "A data.frame: 6 × 1\n",
       "\\begin{tabular}{r|l}\n",
       "  & population\\_total\\\\\n",
       "  & <int>\\\\\n",
       "\\hline\n",
       "\t1 & 13\\\\\n",
       "\t2 & 16\\\\\n",
       "\t3 & 17\\\\\n",
       "\t4 & 11\\\\\n",
       "\t5 &  1\\\\\n",
       "\t6 &  6\\\\\n",
       "\\end{tabular}\n"
      ],
      "text/markdown": [
       "\n",
       "A data.frame: 6 × 1\n",
       "\n",
       "| <!--/--> | population_total &lt;int&gt; |\n",
       "|---|---|\n",
       "| 1 | 13 |\n",
       "| 2 | 16 |\n",
       "| 3 | 17 |\n",
       "| 4 | 11 |\n",
       "| 5 |  1 |\n",
       "| 6 |  6 |\n",
       "\n"
      ],
      "text/plain": [
       "  population_total\n",
       "1 13              \n",
       "2 16              \n",
       "3 17              \n",
       "4 11              \n",
       "5  1              \n",
       "6  6              "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "population_total <- census_data[, 'P1', drop = FALSE]\n",
    "names(population_total)[1] <- 'population_total'\n",
    "head(population_total)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Households / families total"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"dataframe\">\n",
       "<caption>A data.frame: 6 × 1</caption>\n",
       "<thead>\n",
       "\t<tr><th></th><th scope=col>households_total</th></tr>\n",
       "\t<tr><th></th><th scope=col>&lt;int&gt;</th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "\t<tr><th scope=row>1</th><td> 9</td></tr>\n",
       "\t<tr><th scope=row>2</th><td> 7</td></tr>\n",
       "\t<tr><th scope=row>3</th><td>17</td></tr>\n",
       "\t<tr><th scope=row>4</th><td> 7</td></tr>\n",
       "\t<tr><th scope=row>5</th><td> 1</td></tr>\n",
       "\t<tr><th scope=row>6</th><td> 3</td></tr>\n",
       "</tbody>\n",
       "</table>\n"
      ],
      "text/latex": [
       "A data.frame: 6 × 1\n",
       "\\begin{tabular}{r|l}\n",
       "  & households\\_total\\\\\n",
       "  & <int>\\\\\n",
       "\\hline\n",
       "\t1 &  9\\\\\n",
       "\t2 &  7\\\\\n",
       "\t3 & 17\\\\\n",
       "\t4 &  7\\\\\n",
       "\t5 &  1\\\\\n",
       "\t6 &  3\\\\\n",
       "\\end{tabular}\n"
      ],
      "text/markdown": [
       "\n",
       "A data.frame: 6 × 1\n",
       "\n",
       "| <!--/--> | households_total &lt;int&gt; |\n",
       "|---|---|\n",
       "| 1 |  9 |\n",
       "| 2 |  7 |\n",
       "| 3 | 17 |\n",
       "| 4 |  7 |\n",
       "| 5 |  1 |\n",
       "| 6 |  3 |\n",
       "\n"
      ],
      "text/plain": [
       "  households_total\n",
       "1  9              \n",
       "2  7              \n",
       "3 17              \n",
       "4  7              \n",
       "5  1              \n",
       "6  3              "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "households_total <- census_data[, 'PF1', drop = FALSE]\n",
    "names(households_total)[1] <- 'households_total'\n",
    "head(households_total)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Domain data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Age domain"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"dataframe\">\n",
       "<caption>A data.frame: 6 × 4</caption>\n",
       "<thead>\n",
       "\t<tr><th></th><th scope=col>early_childhood_boy_pct</th><th scope=col>early_childhood_girl_pct</th><th scope=col>age_middle_to_oldest_old_male_pct</th><th scope=col>age_middle_to_oldest_old_female_pct</th></tr>\n",
       "\t<tr><th></th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;dbl&gt;</th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "\t<tr><th scope=row>1</th><td>0</td><td>0</td><td> 15.38462</td><td>0.00</td></tr>\n",
       "\t<tr><th scope=row>2</th><td>0</td><td>0</td><td>  6.25000</td><td>6.25</td></tr>\n",
       "\t<tr><th scope=row>3</th><td>0</td><td>0</td><td> 23.52941</td><td>0.00</td></tr>\n",
       "\t<tr><th scope=row>4</th><td>0</td><td>0</td><td> 18.18182</td><td>0.00</td></tr>\n",
       "\t<tr><th scope=row>5</th><td>0</td><td>0</td><td>100.00000</td><td>0.00</td></tr>\n",
       "\t<tr><th scope=row>6</th><td>0</td><td>0</td><td>  0.00000</td><td>0.00</td></tr>\n",
       "</tbody>\n",
       "</table>\n"
      ],
      "text/latex": [
       "A data.frame: 6 × 4\n",
       "\\begin{tabular}{r|llll}\n",
       "  & early\\_childhood\\_boy\\_pct & early\\_childhood\\_girl\\_pct & age\\_middle\\_to\\_oldest\\_old\\_male\\_pct & age\\_middle\\_to\\_oldest\\_old\\_female\\_pct\\\\\n",
       "  & <dbl> & <dbl> & <dbl> & <dbl>\\\\\n",
       "\\hline\n",
       "\t1 & 0 & 0 &  15.38462 & 0.00\\\\\n",
       "\t2 & 0 & 0 &   6.25000 & 6.25\\\\\n",
       "\t3 & 0 & 0 &  23.52941 & 0.00\\\\\n",
       "\t4 & 0 & 0 &  18.18182 & 0.00\\\\\n",
       "\t5 & 0 & 0 & 100.00000 & 0.00\\\\\n",
       "\t6 & 0 & 0 &   0.00000 & 0.00\\\\\n",
       "\\end{tabular}\n"
      ],
      "text/markdown": [
       "\n",
       "A data.frame: 6 × 4\n",
       "\n",
       "| <!--/--> | early_childhood_boy_pct &lt;dbl&gt; | early_childhood_girl_pct &lt;dbl&gt; | age_middle_to_oldest_old_male_pct &lt;dbl&gt; | age_middle_to_oldest_old_female_pct &lt;dbl&gt; |\n",
       "|---|---|---|---|---|\n",
       "| 1 | 0 | 0 |  15.38462 | 0.00 |\n",
       "| 2 | 0 | 0 |   6.25000 | 6.25 |\n",
       "| 3 | 0 | 0 |  23.52941 | 0.00 |\n",
       "| 4 | 0 | 0 |  18.18182 | 0.00 |\n",
       "| 5 | 0 | 0 | 100.00000 | 0.00 |\n",
       "| 6 | 0 | 0 |   0.00000 | 0.00 |\n",
       "\n"
      ],
      "text/plain": [
       "  early_childhood_boy_pct early_childhood_girl_pct\n",
       "1 0                       0                       \n",
       "2 0                       0                       \n",
       "3 0                       0                       \n",
       "4 0                       0                       \n",
       "5 0                       0                       \n",
       "6 0                       0                       \n",
       "  age_middle_to_oldest_old_male_pct age_middle_to_oldest_old_female_pct\n",
       "1  15.38462                         0.00                               \n",
       "2   6.25000                         6.25                               \n",
       "3  23.52941                         0.00                               \n",
       "4  18.18182                         0.00                               \n",
       "5 100.00000                         0.00                               \n",
       "6   0.00000                         0.00                               "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "### Domain:Age ###\n",
    "\n",
    "# Age - early childhood boy (under 5 years old)\n",
    "early_childhood_boy_fields <- c(\n",
    "    'P30' # Age 0 to 4 boys\n",
    ")\n",
    "early_childhood_boy_data <- census_data[, early_childhood_boy_fields, drop = FALSE]\n",
    "early_childhood_boy <- rowSums(early_childhood_boy_data, na.rm=TRUE)\n",
    "early_childhood_boy_pct <- (early_childhood_boy / population_total) * 100.0\n",
    "names(early_childhood_boy_pct)[1] <- 'early_childhood_boy_pct'\n",
    "\n",
    "# Age - early childhood girl (under 5 years old)\n",
    "early_childhood_girl_fields <- c(\n",
    "    'P67' # Age 0 to 4 girls\n",
    ")\n",
    "early_childhood_girl_data <- census_data[, early_childhood_girl_fields, drop = FALSE]\n",
    "early_childhood_girl <- rowSums(early_childhood_girl_data, na.rm=TRUE)\n",
    "early_childhood_girl_pct <- (early_childhood_girl / population_total) * 100.0\n",
    "names(early_childhood_girl_pct)[1] <- 'early_childhood_girl_pct'\n",
    "\n",
    "# Age - middle to oldest old male (75+ years old)\n",
    "age_middle_to_oldest_old_male_fields <- c(\n",
    "    'P45' # Age 75+ males\n",
    ")\n",
    "age_middle_to_oldest_old_male_data <- census_data[, age_middle_to_oldest_old_male_fields, drop = FALSE]\n",
    "age_middle_to_oldest_old_male <- rowSums(age_middle_to_oldest_old_male_data, na.rm=TRUE)\n",
    "age_middle_to_oldest_old_male_pct <- (age_middle_to_oldest_old_male / population_total) * 100.0\n",
    "names(age_middle_to_oldest_old_male_pct)[1] <- 'age_middle_to_oldest_old_male_pct'\n",
    "\n",
    "# Age - middle to oldest old male (75+ years old)\n",
    "age_middle_to_oldest_old_female_fields <- c(\n",
    "    'P82' # Age 75+ females\n",
    ")\n",
    "age_middle_to_oldest_old_female_data <- census_data[, age_middle_to_oldest_old_female_fields, drop = FALSE]\n",
    "age_middle_to_oldest_old_female <- rowSums(age_middle_to_oldest_old_female_data, na.rm=TRUE)\n",
    "age_middle_to_oldest_old_female_pct <- (age_middle_to_oldest_old_female / population_total) * 100.0\n",
    "names(age_middle_to_oldest_old_female_pct)[1] <- 'age_middle_to_oldest_old_female_pct'\n",
    "\n",
    "# Combine all these indicators into an array for this domain\n",
    "age_domain_pct <- cbind(early_childhood_boy_pct,\n",
    "                        early_childhood_girl_pct,\n",
    "                        age_middle_to_oldest_old_male_pct,\n",
    "                        age_middle_to_oldest_old_female_pct)\n",
    "\n",
    "# Print the first six rows of the data to visually check it looks OK\n",
    "head(age_domain_pct)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Income domain"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"dataframe\">\n",
       "<caption>A data.frame: 6 × 2</caption>\n",
       "<thead>\n",
       "\t<tr><th></th><th scope=col>dependants_pct</th><th scope=col>unemployment_pct</th></tr>\n",
       "\t<tr><th></th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;dbl&gt;</th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "\t<tr><th scope=row>1</th><td> 7.692308</td><td>15.384615</td></tr>\n",
       "\t<tr><th scope=row>2</th><td>12.500000</td><td>31.250000</td></tr>\n",
       "\t<tr><th scope=row>3</th><td> 0.000000</td><td>17.647059</td></tr>\n",
       "\t<tr><th scope=row>4</th><td> 9.090909</td><td> 9.090909</td></tr>\n",
       "\t<tr><th scope=row>5</th><td> 0.000000</td><td> 0.000000</td></tr>\n",
       "\t<tr><th scope=row>6</th><td> 0.000000</td><td> 0.000000</td></tr>\n",
       "</tbody>\n",
       "</table>\n"
      ],
      "text/latex": [
       "A data.frame: 6 × 2\n",
       "\\begin{tabular}{r|ll}\n",
       "  & dependants\\_pct & unemployment\\_pct\\\\\n",
       "  & <dbl> & <dbl>\\\\\n",
       "\\hline\n",
       "\t1 &  7.692308 & 15.384615\\\\\n",
       "\t2 & 12.500000 & 31.250000\\\\\n",
       "\t3 &  0.000000 & 17.647059\\\\\n",
       "\t4 &  9.090909 &  9.090909\\\\\n",
       "\t5 &  0.000000 &  0.000000\\\\\n",
       "\t6 &  0.000000 &  0.000000\\\\\n",
       "\\end{tabular}\n"
      ],
      "text/markdown": [
       "\n",
       "A data.frame: 6 × 2\n",
       "\n",
       "| <!--/--> | dependants_pct &lt;dbl&gt; | unemployment_pct &lt;dbl&gt; |\n",
       "|---|---|---|\n",
       "| 1 |  7.692308 | 15.384615 |\n",
       "| 2 | 12.500000 | 31.250000 |\n",
       "| 3 |  0.000000 | 17.647059 |\n",
       "| 4 |  9.090909 |  9.090909 |\n",
       "| 5 |  0.000000 |  0.000000 |\n",
       "| 6 |  0.000000 |  0.000000 |\n",
       "\n"
      ],
      "text/plain": [
       "  dependants_pct unemployment_pct\n",
       "1  7.692308      15.384615       \n",
       "2 12.500000      31.250000       \n",
       "3  0.000000      17.647059       \n",
       "4  9.090909       9.090909       \n",
       "5  0.000000       0.000000       \n",
       "6  0.000000       0.000000       "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "### Domain:Income ###\n",
    "\n",
    "# Dependants - using under 15 years old for the 2022 Italian census\n",
    "dependants_fields <- c(\n",
    "    'P14',\n",
    "    'P15',\n",
    "    'P16')\n",
    "dependants_data <- census_data[, dependants_fields, drop = FALSE]\n",
    "dependants <- rowSums(dependants_data, na.rm=TRUE)\n",
    "dependants_pct <- (dependants / population_total) * 100.0\n",
    "names(dependants_pct)[1] <- 'dependants_pct'\n",
    "\n",
    "# Unemployment\n",
    "## Calculate total poluation for ages 15-64.\n",
    "## Note: P101 = total employed people aged 15-64.\n",
    "## Note: Sum of P17, P18, P19, P20, P21, P22, P23, P24, P25 and P26 is total poluation for ages 15-64.\n",
    "## Note: to calculate unemployed we use: P101 - total poluation for ages 15-64.\n",
    "## Note: this unemployed figure might also indicate students not working\n",
    "population_15to64_fields <- c(\n",
    "    'P17',\n",
    "    'P18',\n",
    "    'P19',\n",
    "    'P20',\n",
    "    'P21',\n",
    "    'P22',\n",
    "    'P23',\n",
    "    'P24',\n",
    "    'P25',\n",
    "    'P26')\n",
    "population_15to64_data <- census_data[, population_15to64_fields, drop = FALSE]\n",
    "population_15to64 <- rowSums(population_15to64_data, na.rm=TRUE)\n",
    "unemployment <- population_15to64 - census_data$P101\n",
    "unemployment_pct <- (unemployment / population_total) * 100.0\n",
    "names(unemployment_pct)[1] <- 'unemployment_pct'\n",
    "\n",
    "# Combine all these indicators into an array for this domain\n",
    "income_domain_pct <- cbind(dependants_pct,\n",
    "                           unemployment_pct)\n",
    "\n",
    "# Print the first six rows of the data to visually check it looks OK\n",
    "head(income_domain_pct)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Information Access/Use domain"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"dataframe\">\n",
       "<caption>A data.frame: 6 × 1</caption>\n",
       "<thead>\n",
       "\t<tr><th></th><th scope=col>no_higher_education_pct</th></tr>\n",
       "\t<tr><th></th><th scope=col>&lt;dbl&gt;</th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "\t<tr><th scope=row>1</th><td> 7.692308</td></tr>\n",
       "\t<tr><th scope=row>2</th><td>18.750000</td></tr>\n",
       "\t<tr><th scope=row>3</th><td> 0.000000</td></tr>\n",
       "\t<tr><th scope=row>4</th><td> 0.000000</td></tr>\n",
       "\t<tr><th scope=row>5</th><td> 0.000000</td></tr>\n",
       "\t<tr><th scope=row>6</th><td> 0.000000</td></tr>\n",
       "</tbody>\n",
       "</table>\n"
      ],
      "text/latex": [
       "A data.frame: 6 × 1\n",
       "\\begin{tabular}{r|l}\n",
       "  & no\\_higher\\_education\\_pct\\\\\n",
       "  & <dbl>\\\\\n",
       "\\hline\n",
       "\t1 &  7.692308\\\\\n",
       "\t2 & 18.750000\\\\\n",
       "\t3 &  0.000000\\\\\n",
       "\t4 &  0.000000\\\\\n",
       "\t5 &  0.000000\\\\\n",
       "\t6 &  0.000000\\\\\n",
       "\\end{tabular}\n"
      ],
      "text/markdown": [
       "\n",
       "A data.frame: 6 × 1\n",
       "\n",
       "| <!--/--> | no_higher_education_pct &lt;dbl&gt; |\n",
       "|---|---|\n",
       "| 1 |  7.692308 |\n",
       "| 2 | 18.750000 |\n",
       "| 3 |  0.000000 |\n",
       "| 4 |  0.000000 |\n",
       "| 5 |  0.000000 |\n",
       "| 6 |  0.000000 |\n",
       "\n"
      ],
      "text/plain": [
       "  no_higher_education_pct\n",
       "1  7.692308              \n",
       "2 18.750000              \n",
       "3  0.000000              \n",
       "4  0.000000              \n",
       "5  0.000000              \n",
       "6  0.000000              "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "### Domain:Information Access/Use ###\n",
    "\n",
    "# No higher education\n",
    "### Note: calculation test1 for P83 (total population age 9+) = P86 + P87 + P88 + P89 + P90 (without qualification + all primary to highest education levels)\n",
    "### Note: calculation test2 for P83 (total population age 9+) = P16 + P17+ P18 + P19 + P20 + P21 + P22 + P23 + P24 + P25 + P26 + P27 + P28 + P29 (age 10+)\n",
    "### Note: using without qualification (P86) + primary/elementary schools qualification only (P87) \n",
    "no_higher_education_fields <- c(\n",
    "    'P86',\n",
    "    'P87'\n",
    ")\n",
    "no_higher_education_data <- census_data[, no_higher_education_fields, drop = FALSE]\n",
    "no_higher_education <- rowSums(no_higher_education_data, na.rm=TRUE)\n",
    "no_higher_education_pct <- (no_higher_education / population_total) * 100.0\n",
    "names(no_higher_education_pct)[1] <- 'no_higher_education_pct'\n",
    "\n",
    "# Combine all these indicators into an array for this domain\n",
    "info_domain_pct <- cbind(no_higher_education_pct)\n",
    "\n",
    "# Print the first six rows of the data to visually check it looks OK\n",
    "head(info_domain_pct)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Local knowledge domain"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"dataframe\">\n",
       "<caption>A data.frame: 6 × 1</caption>\n",
       "<thead>\n",
       "\t<tr><th></th><th scope=col>foreign_nationals_pct</th></tr>\n",
       "\t<tr><th></th><th scope=col>&lt;dbl&gt;</th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "\t<tr><th scope=row>1</th><td>23.07692</td></tr>\n",
       "\t<tr><th scope=row>2</th><td>31.25000</td></tr>\n",
       "\t<tr><th scope=row>3</th><td> 0.00000</td></tr>\n",
       "\t<tr><th scope=row>4</th><td>18.18182</td></tr>\n",
       "\t<tr><th scope=row>5</th><td> 0.00000</td></tr>\n",
       "\t<tr><th scope=row>6</th><td>66.66667</td></tr>\n",
       "</tbody>\n",
       "</table>\n"
      ],
      "text/latex": [
       "A data.frame: 6 × 1\n",
       "\\begin{tabular}{r|l}\n",
       "  & foreign\\_nationals\\_pct\\\\\n",
       "  & <dbl>\\\\\n",
       "\\hline\n",
       "\t1 & 23.07692\\\\\n",
       "\t2 & 31.25000\\\\\n",
       "\t3 &  0.00000\\\\\n",
       "\t4 & 18.18182\\\\\n",
       "\t5 &  0.00000\\\\\n",
       "\t6 & 66.66667\\\\\n",
       "\\end{tabular}\n"
      ],
      "text/markdown": [
       "\n",
       "A data.frame: 6 × 1\n",
       "\n",
       "| <!--/--> | foreign_nationals_pct &lt;dbl&gt; |\n",
       "|---|---|\n",
       "| 1 | 23.07692 |\n",
       "| 2 | 31.25000 |\n",
       "| 3 |  0.00000 |\n",
       "| 4 | 18.18182 |\n",
       "| 5 |  0.00000 |\n",
       "| 6 | 66.66667 |\n",
       "\n"
      ],
      "text/plain": [
       "  foreign_nationals_pct\n",
       "1 23.07692             \n",
       "2 31.25000             \n",
       "3  0.00000             \n",
       "4 18.18182             \n",
       "5  0.00000             \n",
       "6 66.66667             "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "### Domain:Local Knowledge ###\n",
    "\n",
    "# Foreign nationals (already a percentage)\n",
    "foreign_nationals_fields <- c(\n",
    "    'ST1'\n",
    ")\n",
    "foreign_nationals_data <- census_data[, foreign_nationals_fields, drop = FALSE]\n",
    "foreign_nationals <- rowSums(foreign_nationals_data, na.rm=TRUE)\n",
    "foreign_nationals_pct <- (foreign_nationals / population_total) * 100.0\n",
    "names(foreign_nationals_pct)[1] <- 'foreign_nationals_pct'\n",
    "\n",
    "# Combine all these indicators into an array for this domain\n",
    "local_knowledge_domain_pct <- cbind(foreign_nationals_pct)\n",
    "\n",
    "# Print the first six rows of the data to visually check it looks OK\n",
    "head(local_knowledge_domain_pct)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Social Network domain data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"dataframe\">\n",
       "<caption>A data.frame: 6 × 2</caption>\n",
       "<thead>\n",
       "\t<tr><th></th><th scope=col>primary_school_age_pct</th><th scope=col>one_person_households_pct</th></tr>\n",
       "\t<tr><th></th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;dbl&gt;</th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "\t<tr><th scope=row>1</th><td>0.000000</td><td> 66.66667</td></tr>\n",
       "\t<tr><th scope=row>2</th><td>0.000000</td><td> 42.85714</td></tr>\n",
       "\t<tr><th scope=row>3</th><td>0.000000</td><td>100.00000</td></tr>\n",
       "\t<tr><th scope=row>4</th><td>9.090909</td><td> 57.14286</td></tr>\n",
       "\t<tr><th scope=row>5</th><td>0.000000</td><td>100.00000</td></tr>\n",
       "\t<tr><th scope=row>6</th><td>0.000000</td><td> 66.66667</td></tr>\n",
       "</tbody>\n",
       "</table>\n"
      ],
      "text/latex": [
       "A data.frame: 6 × 2\n",
       "\\begin{tabular}{r|ll}\n",
       "  & primary\\_school\\_age\\_pct & one\\_person\\_households\\_pct\\\\\n",
       "  & <dbl> & <dbl>\\\\\n",
       "\\hline\n",
       "\t1 & 0.000000 &  66.66667\\\\\n",
       "\t2 & 0.000000 &  42.85714\\\\\n",
       "\t3 & 0.000000 & 100.00000\\\\\n",
       "\t4 & 9.090909 &  57.14286\\\\\n",
       "\t5 & 0.000000 & 100.00000\\\\\n",
       "\t6 & 0.000000 &  66.66667\\\\\n",
       "\\end{tabular}\n"
      ],
      "text/markdown": [
       "\n",
       "A data.frame: 6 × 2\n",
       "\n",
       "| <!--/--> | primary_school_age_pct &lt;dbl&gt; | one_person_households_pct &lt;dbl&gt; |\n",
       "|---|---|---|\n",
       "| 1 | 0.000000 |  66.66667 |\n",
       "| 2 | 0.000000 |  42.85714 |\n",
       "| 3 | 0.000000 | 100.00000 |\n",
       "| 4 | 9.090909 |  57.14286 |\n",
       "| 5 | 0.000000 | 100.00000 |\n",
       "| 6 | 0.000000 |  66.66667 |\n",
       "\n"
      ],
      "text/plain": [
       "  primary_school_age_pct one_person_households_pct\n",
       "1 0.000000                66.66667                \n",
       "2 0.000000                42.85714                \n",
       "3 0.000000               100.00000                \n",
       "4 9.090909                57.14286                \n",
       "5 0.000000               100.00000                \n",
       "6 0.000000                66.66667                "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "### Domain:Social Network ###\n",
    "\n",
    "# Primary school age children\n",
    "primary_school_age_fields <- c(\n",
    "    'P15' # Primary School Age Children (Age 5-9) (Indicator P15)\n",
    ")\n",
    "primary_school_age_data <- census_data[, primary_school_age_fields, drop = FALSE]\n",
    "primary_school_age <- rowSums(primary_school_age_data, na.rm=TRUE)\n",
    "primary_school_age_pct <- (primary_school_age / population_total) * 100.0\n",
    "names(primary_school_age_pct)[1] <- 'primary_school_age_pct'\n",
    "\n",
    "# One person households\n",
    "one_person_households_fields<- c(\n",
    "    'PF3' # Households with one person (Indicator PF3)\n",
    ")\n",
    "one_person_households_data <- census_data[, one_person_households_fields, drop = FALSE]\n",
    "one_person_households <- rowSums(one_person_households_data, na.rm=TRUE)\n",
    "one_person_households_pct <- (one_person_households / households_total) * 100.0\n",
    "names(one_person_households_pct)[1] <- 'one_person_households_pct'\n",
    "\n",
    "# Combine all these indicators into an array for this domain\n",
    "social_network_domain_pct <- cbind(primary_school_age_pct,\n",
    "                                   one_person_households_pct)\n",
    "\n",
    "# Print the first six rows of the data to visually check it looks OK\n",
    "head(social_network_domain_pct)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Combine all data into one table"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"dataframe\">\n",
       "<caption>A data.frame: 6 × 11</caption>\n",
       "<thead>\n",
       "\t<tr><th></th><th scope=col>SEZ2011</th><th scope=col>early_childhood_boy_pct</th><th scope=col>early_childhood_girl_pct</th><th scope=col>age_middle_to_oldest_old_male_pct</th><th scope=col>age_middle_to_oldest_old_female_pct</th><th scope=col>dependants_pct</th><th scope=col>unemployment_pct</th><th scope=col>no_higher_education_pct</th><th scope=col>foreign_nationals_pct</th><th scope=col>primary_school_age_pct</th><th scope=col>one_person_households_pct</th></tr>\n",
       "\t<tr><th></th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;dbl&gt;</th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "\t<tr><th scope=row>1</th><td>1.5146e+11</td><td>0</td><td>0</td><td> 15.38462</td><td>0.00</td><td> 7.692308</td><td>15.384615</td><td> 7.692308</td><td>23.07692</td><td>0.000000</td><td> 66.66667</td></tr>\n",
       "\t<tr><th scope=row>2</th><td>1.5146e+11</td><td>0</td><td>0</td><td>  6.25000</td><td>6.25</td><td>12.500000</td><td>31.250000</td><td>18.750000</td><td>31.25000</td><td>0.000000</td><td> 42.85714</td></tr>\n",
       "\t<tr><th scope=row>3</th><td>1.5146e+11</td><td>0</td><td>0</td><td> 23.52941</td><td>0.00</td><td> 0.000000</td><td>17.647059</td><td> 0.000000</td><td> 0.00000</td><td>0.000000</td><td>100.00000</td></tr>\n",
       "\t<tr><th scope=row>4</th><td>1.5146e+11</td><td>0</td><td>0</td><td> 18.18182</td><td>0.00</td><td> 9.090909</td><td> 9.090909</td><td> 0.000000</td><td>18.18182</td><td>9.090909</td><td> 57.14286</td></tr>\n",
       "\t<tr><th scope=row>5</th><td>1.5146e+11</td><td>0</td><td>0</td><td>100.00000</td><td>0.00</td><td> 0.000000</td><td> 0.000000</td><td> 0.000000</td><td> 0.00000</td><td>0.000000</td><td>100.00000</td></tr>\n",
       "\t<tr><th scope=row>6</th><td>1.5146e+11</td><td>0</td><td>0</td><td>  0.00000</td><td>0.00</td><td> 0.000000</td><td> 0.000000</td><td> 0.000000</td><td>66.66667</td><td>0.000000</td><td> 66.66667</td></tr>\n",
       "</tbody>\n",
       "</table>\n"
      ],
      "text/latex": [
       "A data.frame: 6 × 11\n",
       "\\begin{tabular}{r|lllllllllll}\n",
       "  & SEZ2011 & early\\_childhood\\_boy\\_pct & early\\_childhood\\_girl\\_pct & age\\_middle\\_to\\_oldest\\_old\\_male\\_pct & age\\_middle\\_to\\_oldest\\_old\\_female\\_pct & dependants\\_pct & unemployment\\_pct & no\\_higher\\_education\\_pct & foreign\\_nationals\\_pct & primary\\_school\\_age\\_pct & one\\_person\\_households\\_pct\\\\\n",
       "  & <dbl> & <dbl> & <dbl> & <dbl> & <dbl> & <dbl> & <dbl> & <dbl> & <dbl> & <dbl> & <dbl>\\\\\n",
       "\\hline\n",
       "\t1 & 1.5146e+11 & 0 & 0 &  15.38462 & 0.00 &  7.692308 & 15.384615 &  7.692308 & 23.07692 & 0.000000 &  66.66667\\\\\n",
       "\t2 & 1.5146e+11 & 0 & 0 &   6.25000 & 6.25 & 12.500000 & 31.250000 & 18.750000 & 31.25000 & 0.000000 &  42.85714\\\\\n",
       "\t3 & 1.5146e+11 & 0 & 0 &  23.52941 & 0.00 &  0.000000 & 17.647059 &  0.000000 &  0.00000 & 0.000000 & 100.00000\\\\\n",
       "\t4 & 1.5146e+11 & 0 & 0 &  18.18182 & 0.00 &  9.090909 &  9.090909 &  0.000000 & 18.18182 & 9.090909 &  57.14286\\\\\n",
       "\t5 & 1.5146e+11 & 0 & 0 & 100.00000 & 0.00 &  0.000000 &  0.000000 &  0.000000 &  0.00000 & 0.000000 & 100.00000\\\\\n",
       "\t6 & 1.5146e+11 & 0 & 0 &   0.00000 & 0.00 &  0.000000 &  0.000000 &  0.000000 & 66.66667 & 0.000000 &  66.66667\\\\\n",
       "\\end{tabular}\n"
      ],
      "text/markdown": [
       "\n",
       "A data.frame: 6 × 11\n",
       "\n",
       "| <!--/--> | SEZ2011 &lt;dbl&gt; | early_childhood_boy_pct &lt;dbl&gt; | early_childhood_girl_pct &lt;dbl&gt; | age_middle_to_oldest_old_male_pct &lt;dbl&gt; | age_middle_to_oldest_old_female_pct &lt;dbl&gt; | dependants_pct &lt;dbl&gt; | unemployment_pct &lt;dbl&gt; | no_higher_education_pct &lt;dbl&gt; | foreign_nationals_pct &lt;dbl&gt; | primary_school_age_pct &lt;dbl&gt; | one_person_households_pct &lt;dbl&gt; |\n",
       "|---|---|---|---|---|---|---|---|---|---|---|---|\n",
       "| 1 | 1.5146e+11 | 0 | 0 |  15.38462 | 0.00 |  7.692308 | 15.384615 |  7.692308 | 23.07692 | 0.000000 |  66.66667 |\n",
       "| 2 | 1.5146e+11 | 0 | 0 |   6.25000 | 6.25 | 12.500000 | 31.250000 | 18.750000 | 31.25000 | 0.000000 |  42.85714 |\n",
       "| 3 | 1.5146e+11 | 0 | 0 |  23.52941 | 0.00 |  0.000000 | 17.647059 |  0.000000 |  0.00000 | 0.000000 | 100.00000 |\n",
       "| 4 | 1.5146e+11 | 0 | 0 |  18.18182 | 0.00 |  9.090909 |  9.090909 |  0.000000 | 18.18182 | 9.090909 |  57.14286 |\n",
       "| 5 | 1.5146e+11 | 0 | 0 | 100.00000 | 0.00 |  0.000000 |  0.000000 |  0.000000 |  0.00000 | 0.000000 | 100.00000 |\n",
       "| 6 | 1.5146e+11 | 0 | 0 |   0.00000 | 0.00 |  0.000000 |  0.000000 |  0.000000 | 66.66667 | 0.000000 |  66.66667 |\n",
       "\n"
      ],
      "text/plain": [
       "  SEZ2011    early_childhood_boy_pct early_childhood_girl_pct\n",
       "1 1.5146e+11 0                       0                       \n",
       "2 1.5146e+11 0                       0                       \n",
       "3 1.5146e+11 0                       0                       \n",
       "4 1.5146e+11 0                       0                       \n",
       "5 1.5146e+11 0                       0                       \n",
       "6 1.5146e+11 0                       0                       \n",
       "  age_middle_to_oldest_old_male_pct age_middle_to_oldest_old_female_pct\n",
       "1  15.38462                         0.00                               \n",
       "2   6.25000                         6.25                               \n",
       "3  23.52941                         0.00                               \n",
       "4  18.18182                         0.00                               \n",
       "5 100.00000                         0.00                               \n",
       "6   0.00000                         0.00                               \n",
       "  dependants_pct unemployment_pct no_higher_education_pct foreign_nationals_pct\n",
       "1  7.692308      15.384615         7.692308               23.07692             \n",
       "2 12.500000      31.250000        18.750000               31.25000             \n",
       "3  0.000000      17.647059         0.000000                0.00000             \n",
       "4  9.090909       9.090909         0.000000               18.18182             \n",
       "5  0.000000       0.000000         0.000000                0.00000             \n",
       "6  0.000000       0.000000         0.000000               66.66667             \n",
       "  primary_school_age_pct one_person_households_pct\n",
       "1 0.000000                66.66667                \n",
       "2 0.000000                42.85714                \n",
       "3 0.000000               100.00000                \n",
       "4 9.090909                57.14286                \n",
       "5 0.000000               100.00000                \n",
       "6 0.000000                66.66667                "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Combine all data into one table\n",
    "indicator_domains_pct <- cbind(census_area_id,\n",
    "                               age_domain_pct,\n",
    "                               income_domain_pct,\n",
    "                               info_domain_pct,\n",
    "                               local_knowledge_domain_pct,\n",
    "                               social_network_domain_pct)\n",
    "head(indicator_domains_pct)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Calculate Z-Score\n",
    "The raw data is not suitable for use within the vulnerabiltiy assessment. It needs to be standardised. Therefore, the data is converted to z-scores. Z-scores are:\n",
    "\n",
    ">\"A statistical measurement of a score's relationship to the mean (average value) in a group of scores. A Z-score of 0 means the score is the same as the mean (average value). A Z-score can be positive or negative, indicating whether it is above or below the mean and by how many standard deviations. Z-score standardisation represents the deviation of a raw score from its mean in standard deviation units.\" (Kazmierczak et al., 2015)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Calculate the Z-score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"dataframe\">\n",
       "<caption>A data.frame: 6 × 11</caption>\n",
       "<thead>\n",
       "\t<tr><th></th><th scope=col>SEZ2011</th><th scope=col>early_childhood_boy</th><th scope=col>early_childhood_girl</th><th scope=col>age_middle_to_oldest_old_male</th><th scope=col>age_middle_to_oldest_old_female</th><th scope=col>dependants</th><th scope=col>unemployment</th><th scope=col>no_higher_education</th><th scope=col>foreign_nationals</th><th scope=col>primary_school_age</th><th scope=col>one_person_households</th></tr>\n",
       "\t<tr><th></th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;dbl[,1]&gt;</th><th scope=col>&lt;dbl[,1]&gt;</th><th scope=col>&lt;dbl[,1]&gt;</th><th scope=col>&lt;dbl[,1]&gt;</th><th scope=col>&lt;dbl[,1]&gt;</th><th scope=col>&lt;dbl[,1]&gt;</th><th scope=col>&lt;dbl[,1]&gt;</th><th scope=col>&lt;dbl[,1]&gt;</th><th scope=col>&lt;dbl[,1]&gt;</th><th scope=col>&lt;dbl[,1]&gt;</th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "\t<tr><th scope=row>1</th><td>1.5146e+11</td><td>-1.230411</td><td>-1.064144</td><td> 2.1196292</td><td>-1.3062152</td><td>-0.7801407</td><td>-0.5060886</td><td>-0.6075338</td><td> 0.284861877</td><td>-1.458348</td><td> 0.9333249</td></tr>\n",
       "\t<tr><th scope=row>2</th><td>1.5146e+11</td><td>-1.230411</td><td>-1.064144</td><td> 0.2302043</td><td>-0.3189938</td><td> 0.1197864</td><td> 1.3419462</td><td> 0.8849880</td><td> 0.768744204</td><td>-1.458348</td><td>-0.7026211</td></tr>\n",
       "\t<tr><th scope=row>3</th><td>1.5146e+11</td><td>-1.230411</td><td>-1.064144</td><td> 3.8043177</td><td>-1.3062152</td><td>-2.2200240</td><td>-0.2425543</td><td>-1.6458099</td><td>-1.081394102</td><td>-1.458348</td><td> 3.2236494</td></tr>\n",
       "\t<tr><th scope=row>4</th><td>1.5146e+11</td><td>-1.230411</td><td>-1.064144</td><td> 2.6982091</td><td>-1.3062152</td><td>-0.5183437</td><td>-1.2391934</td><td>-1.6458099</td><td>-0.004949997</td><td> 1.865648</td><td> 0.2789465</td></tr>\n",
       "\t<tr><th scope=row>5</th><td>1.5146e+11</td><td>-1.230411</td><td>-1.064144</td><td>19.6216703</td><td>-1.3062152</td><td>-2.2200240</td><td>-2.2981224</td><td>-1.6458099</td><td>-1.081394102</td><td>-1.458348</td><td> 3.2236494</td></tr>\n",
       "\t<tr><th scope=row>6</th><td>1.5146e+11</td><td>-1.230411</td><td>-1.064144</td><td>-1.0625601</td><td>-1.3062152</td><td>-2.2200240</td><td>-2.2981224</td><td>-1.6458099</td><td> 2.865567617</td><td>-1.458348</td><td> 0.9333249</td></tr>\n",
       "</tbody>\n",
       "</table>\n"
      ],
      "text/latex": [
       "A data.frame: 6 × 11\n",
       "\\begin{tabular}{r|lllllllllll}\n",
       "  & SEZ2011 & early\\_childhood\\_boy & early\\_childhood\\_girl & age\\_middle\\_to\\_oldest\\_old\\_male & age\\_middle\\_to\\_oldest\\_old\\_female & dependants & unemployment & no\\_higher\\_education & foreign\\_nationals & primary\\_school\\_age & one\\_person\\_households\\\\\n",
       "  & <dbl> & <dbl{[},1{]}> & <dbl{[},1{]}> & <dbl{[},1{]}> & <dbl{[},1{]}> & <dbl{[},1{]}> & <dbl{[},1{]}> & <dbl{[},1{]}> & <dbl{[},1{]}> & <dbl{[},1{]}> & <dbl{[},1{]}>\\\\\n",
       "\\hline\n",
       "\t1 & 1.5146e+11 & -1.230411 & -1.064144 &  2.1196292 & -1.3062152 & -0.7801407 & -0.5060886 & -0.6075338 &  0.284861877 & -1.458348 &  0.9333249\\\\\n",
       "\t2 & 1.5146e+11 & -1.230411 & -1.064144 &  0.2302043 & -0.3189938 &  0.1197864 &  1.3419462 &  0.8849880 &  0.768744204 & -1.458348 & -0.7026211\\\\\n",
       "\t3 & 1.5146e+11 & -1.230411 & -1.064144 &  3.8043177 & -1.3062152 & -2.2200240 & -0.2425543 & -1.6458099 & -1.081394102 & -1.458348 &  3.2236494\\\\\n",
       "\t4 & 1.5146e+11 & -1.230411 & -1.064144 &  2.6982091 & -1.3062152 & -0.5183437 & -1.2391934 & -1.6458099 & -0.004949997 &  1.865648 &  0.2789465\\\\\n",
       "\t5 & 1.5146e+11 & -1.230411 & -1.064144 & 19.6216703 & -1.3062152 & -2.2200240 & -2.2981224 & -1.6458099 & -1.081394102 & -1.458348 &  3.2236494\\\\\n",
       "\t6 & 1.5146e+11 & -1.230411 & -1.064144 & -1.0625601 & -1.3062152 & -2.2200240 & -2.2981224 & -1.6458099 &  2.865567617 & -1.458348 &  0.9333249\\\\\n",
       "\\end{tabular}\n"
      ],
      "text/markdown": [
       "\n",
       "A data.frame: 6 × 11\n",
       "\n",
       "| <!--/--> | SEZ2011 &lt;dbl&gt; | early_childhood_boy &lt;dbl[,1]&gt; | early_childhood_girl &lt;dbl[,1]&gt; | age_middle_to_oldest_old_male &lt;dbl[,1]&gt; | age_middle_to_oldest_old_female &lt;dbl[,1]&gt; | dependants &lt;dbl[,1]&gt; | unemployment &lt;dbl[,1]&gt; | no_higher_education &lt;dbl[,1]&gt; | foreign_nationals &lt;dbl[,1]&gt; | primary_school_age &lt;dbl[,1]&gt; | one_person_households &lt;dbl[,1]&gt; |\n",
       "|---|---|---|---|---|---|---|---|---|---|---|---|\n",
       "| 1 | 1.5146e+11 | -1.230411 | -1.064144 |  2.1196292 | -1.3062152 | -0.7801407 | -0.5060886 | -0.6075338 |  0.284861877 | -1.458348 |  0.9333249 |\n",
       "| 2 | 1.5146e+11 | -1.230411 | -1.064144 |  0.2302043 | -0.3189938 |  0.1197864 |  1.3419462 |  0.8849880 |  0.768744204 | -1.458348 | -0.7026211 |\n",
       "| 3 | 1.5146e+11 | -1.230411 | -1.064144 |  3.8043177 | -1.3062152 | -2.2200240 | -0.2425543 | -1.6458099 | -1.081394102 | -1.458348 |  3.2236494 |\n",
       "| 4 | 1.5146e+11 | -1.230411 | -1.064144 |  2.6982091 | -1.3062152 | -0.5183437 | -1.2391934 | -1.6458099 | -0.004949997 |  1.865648 |  0.2789465 |\n",
       "| 5 | 1.5146e+11 | -1.230411 | -1.064144 | 19.6216703 | -1.3062152 | -2.2200240 | -2.2981224 | -1.6458099 | -1.081394102 | -1.458348 |  3.2236494 |\n",
       "| 6 | 1.5146e+11 | -1.230411 | -1.064144 | -1.0625601 | -1.3062152 | -2.2200240 | -2.2981224 | -1.6458099 |  2.865567617 | -1.458348 |  0.9333249 |\n",
       "\n"
      ],
      "text/plain": [
       "  SEZ2011    early_childhood_boy early_childhood_girl\n",
       "1 1.5146e+11 -1.230411           -1.064144           \n",
       "2 1.5146e+11 -1.230411           -1.064144           \n",
       "3 1.5146e+11 -1.230411           -1.064144           \n",
       "4 1.5146e+11 -1.230411           -1.064144           \n",
       "5 1.5146e+11 -1.230411           -1.064144           \n",
       "6 1.5146e+11 -1.230411           -1.064144           \n",
       "  age_middle_to_oldest_old_male age_middle_to_oldest_old_female dependants\n",
       "1  2.1196292                    -1.3062152                      -0.7801407\n",
       "2  0.2302043                    -0.3189938                       0.1197864\n",
       "3  3.8043177                    -1.3062152                      -2.2200240\n",
       "4  2.6982091                    -1.3062152                      -0.5183437\n",
       "5 19.6216703                    -1.3062152                      -2.2200240\n",
       "6 -1.0625601                    -1.3062152                      -2.2200240\n",
       "  unemployment no_higher_education foreign_nationals primary_school_age\n",
       "1 -0.5060886   -0.6075338           0.284861877      -1.458348         \n",
       "2  1.3419462    0.8849880           0.768744204      -1.458348         \n",
       "3 -0.2425543   -1.6458099          -1.081394102      -1.458348         \n",
       "4 -1.2391934   -1.6458099          -0.004949997       1.865648         \n",
       "5 -2.2981224   -1.6458099          -1.081394102      -1.458348         \n",
       "6 -2.2981224   -1.6458099           2.865567617      -1.458348         \n",
       "  one_person_households\n",
       "1  0.9333249           \n",
       "2 -0.7026211           \n",
       "3  3.2236494           \n",
       "4  0.2789465           \n",
       "5  3.2236494           \n",
       "6  0.9333249           "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Copy the data\n",
    "indicator_z_scores <- indicator_domains_pct\n",
    "\n",
    "# Get the number of columns in the data\n",
    "num_cols = ncol(indicator_z_scores)\n",
    "\n",
    "# Calculate the z scores for each of the relevant columns - starting at the 2nd column\n",
    "for(col in names(indicator_z_scores)[2:num_cols]) {\n",
    "  # rename column (remove the '_pct' in the name)\n",
    "  new_col_name <- gsub(\"_pct\", \"\", col)\n",
    "  indicator_z_scores[new_col_name] = scale(indicator_z_scores[col])\n",
    "}\n",
    "\n",
    "# Remove the original data to leave only the area identifier and the z scores\n",
    "indicator_z_scores <- indicator_z_scores[-c(2:num_cols)]\n",
    "\n",
    "head(indicator_z_scores)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Output the Z-score data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Output the z-score data as a csv file\n",
    "output_file <- file.path(pipeline_dir, \"censusDataZ.csv\")\n",
    "write.csv(indicator_z_scores, output_file, row.names = FALSE)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**END**"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "R",
   "language": "R",
   "name": "ir"
  },
  "language_info": {
   "codemirror_mode": "r",
   "file_extension": ".r",
   "mimetype": "text/x-r-source",
   "name": "R",
   "pygments_lexer": "r",
   "version": "4.3.3"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}