import altair as alt
import pandas as pd
import numpy as np
!ls USA/*
USA/InputDB: USAbirth.txt USAdeath.txt USAref.pdf USAbirthbymonth.txt USAnote.pdf USAtadj.txt USAcom.pdf USApop.txt USAwarn.txt USA/STATS: Births.txt Mx_1x5.txt cMx_1x10.txt Deaths_1x1.txt Mx_5x1.txt cMx_1x5.txt Deaths_1x10.txt Mx_5x10.txt cMx_5x1.txt Deaths_1x5.txt Mx_5x5.txt cMx_5x10.txt Deaths_5x1.txt Population.txt cMx_5x5.txt Deaths_5x10.txt Population5.txt fltper_1x1.csv Deaths_5x5.txt bltper_1x1.csv fltper_1x1.txt Deaths_lexis.txt bltper_1x1.txt fltper_1x10.txt E0per.txt bltper_1x10.txt fltper_1x5.txt E0per_1x10.txt bltper_1x5.txt fltper_5x1.txt E0per_1x5.txt bltper_5x1.txt fltper_5x10.txt Exposures_1x1.txt bltper_5x10.txt fltper_5x5.txt Exposures_1x10.txt bltper_5x5.txt hmd_period_summary.csv Exposures_1x5.txt cExposures_1x1.txt mltper_1x1.csv Exposures_5x1.txt cExposures_1x10.txt mltper_1x1.txt Exposures_5x10.txt cExposures_1x5.txt mltper_1x10.txt Exposures_5x5.txt cExposures_5x1.txt mltper_1x5.txt Exposures_lexis.txt cExposures_5x10.txt mltper_5x1.txt Mx_1x1.txt cExposures_5x5.txt mltper_5x10.txt Mx_1x10.txt cMx_1x1.txt mltper_5x5.txt
!head USA/STATS/Births.txt
The United States of America, Births (1-year) Last modified: 05 Sep 2024; Methods Protocol: v6 (2017) Year Female Male Total 1933 1122180 1184820 2307000 1934 1166072 1229928 2396000 1935 1158000 1219000 2377000 1936 1148000 1207000 2355000 1937 1175000 1238000 2413000 1938 1217000 1280000 2497000 1939 1201000 1265000 2466000
!head USA/STATS/Deaths_1x1.txt
The United States of America, Deaths (period 1x1), Last modified: 05 Sep 2024; Methods Protocol: v6 (2017) Year Age Female Male Total 1933 0 52615.77 68438.11 121053.88 1933 1 8917.13 10329.16 19246.29 1933 2 4336.92 5140.05 9476.97 1933 3 3161.59 3759.88 6921.47 1933 4 2493.84 2932.59 5426.43 1933 5 2139.87 2537.53 4677.40 1933 6 1939.70 2337.76 4277.46
!head -n 50 USA/STATS/Deaths_5x10.txt
The United States of America, Deaths (period 5x10), Last modified: 05 Sep 2024; Methods Protocol: v6 (2017) Year Age Female Male Total 1933-1939 0 362056.18 477977.01 840033.19 1933-1939 1-4 115143.59 135489.04 250632.63 1933-1939 5-9 53258.44 67564.33 120822.77 1933-1939 10-14 46694.50 62828.23 109522.73 1933-1939 15-19 81279.71 99734.97 181014.68 1933-1939 20-24 112947.56 129306.90 242254.46 1933-1939 25-29 124815.57 136924.68 261740.25 1933-1939 30-34 129395.90 151004.95 280400.85 1933-1939 35-39 153069.29 190604.31 343673.60 1933-1939 40-44 176225.26 243295.89 419521.15 1933-1939 45-49 213136.49 312163.99 525300.48 1933-1939 50-54 256553.94 382604.02 639157.96 1933-1939 55-59 290219.72 428887.96 719107.68 1933-1939 60-64 352944.85 492107.37 845052.22 1933-1939 65-69 414869.15 541491.24 956360.39 1933-1939 70-74 445865.48 538023.23 983888.71 1933-1939 75-79 438060.37 488250.91 926311.28 1933-1939 80-84 332338.39 333798.86 666137.25 1933-1939 85-89 190727.20 166439.33 357166.53 1933-1939 90-94 72551.42 52714.64 125266.06 1933-1939 95-99 17839.85 11080.82 28920.67 1933-1939 100-104 4683.89 2615.18 7299.07 1933-1939 105-109 682.78 343.77 1026.55 1933-1939 110+ 65.69 27.36 93.05 1940-1949 0 481556.53 647196.64 1128753.17 1940-1949 1-4 96777.46 117620.76 214398.22 1940-1949 5-9 41153.31 58200.03 99353.34 1940-1949 10-14 36212.73 55474.45 91687.18 1940-1949 15-19 67627.86 100348.40 167976.26 1940-1949 20-24 97106.04 140549.51 237655.55 1940-1949 25-29 109934.72 140989.54 250924.26 1940-1949 30-34 130650.19 161566.35 292216.54 1940-1949 35-39 164460.26 212052.48 376512.74 1940-1949 40-44 206538.92 290262.25 496801.17 1940-1949 45-49 267391.42 407085.98 674477.40 1940-1949 50-54 347278.86 565503.79 912782.65 1940-1949 55-59 421471.74 698098.93 1119570.67 1940-1949 60-64 518150.04 805480.61 1323630.65 1940-1949 65-69 641749.80 890759.96 1532509.76 1940-1949 70-74 730003.55 901430.09 1631433.64 1940-1949 75-79 716457.87 793951.81 1510409.68 1940-1949 80-84 593110.08 580661.73 1173771.81 1940-1949 85-89 360719.61 308121.43 668841.04 1940-1949 90-94 135287.52 97819.23 233106.75 1940-1949 95-99 32194.95 20340.94 52535.89 1940-1949 100-104 6930.45 4075.04 11005.49 1940-1949 105-109 771.50 429.90 1201.40
!cat USA/STATS/E0per_1x10.txt
The United States of America, Life expectancy at birth (period, 1x10) Last modified: 05 Sep 2024; Methods Protocol: v6 (2017) Year Female Male Total 1933-1939 63.42 59.34 61.26 1940-1949 68.19 63.10 65.53 1950-1959 72.35 66.19 69.13 1960-1969 73.87 66.76 70.19 1970-1979 76.12 68.45 72.24 1980-1989 78.14 70.97 74.59 1990-1999 79.15 72.78 76.02 2000-2009 80.08 74.97 77.58 2010-2019 81.22 76.34 78.80 2020-2022 79.85 74.28 77.01
!head USA/STATS/Exposures_1x1.txt
The United States of America, Exposure to risk (period 1x1), Last modified: 05 Sep 2024; Methods Protocol: v6 (2017) Year Age Female Male Total 1933 0 971181.32 1003854.39 1975035.71 1933 1 1005773.86 1028926.84 2034700.70 1933 2 1077549.59 1100519.76 2178069.35 1933 3 1101794.00 1128155.35 2229949.34 1933 4 1118327.09 1156106.87 2274433.95 1933 5 1155442.55 1212990.41 2368432.96 1933 6 1179393.29 1231712.01 2411105.29
!head USA/STATS/cExposures_1x1.txt
The United States of America, Exposure to risk (cohort 1x1), Last modified: 05 Sep 2024; Methods Protocol: v6 (2017) Year Age Female Male Total 1852 0 . . . 1852 1 . . . 1852 2 . . . 1852 3 . . . 1852 4 . . . 1852 5 . . . 1852 6 . . .
!head USA/STATS/Mx_1x1.txt
The United States of America, Death rates (period 1x1), Last modified: 05 Sep 2024; Methods Protocol: v6 (2017) Year Age Female Male Total 1933 0 0.054177 0.068175 0.061292 1933 1 0.008866 0.010039 0.009459 1933 2 0.004025 0.004671 0.004351 1933 3 0.002869 0.003333 0.003104 1933 4 0.002230 0.002537 0.002386 1933 5 0.001852 0.002092 0.001975 1933 6 0.001645 0.001898 0.001774
!head USA/STATS/Mx_5x10.txt
The United States of America, Death rates (period 5x10), Last modified: 05 Sep 2024; Methods Protocol: v6 (2017) Year Age Female Male Total 1933-1939 0 0.053437 0.067995 0.060850 1933-1939 1-4 0.004004 0.004598 0.004304 1933-1939 5-9 0.001343 0.001645 0.001497 1933-1939 10-14 0.001100 0.001452 0.001278 1933-1939 15-19 0.001938 0.002388 0.002162 1933-1939 20-24 0.002807 0.003296 0.003049 1933-1939 25-29 0.003297 0.003721 0.003506
!head USA/STATS/cMx_5x10.txt
The United States of America, Death rates (cohort 5x10), Last modified: 05 Sep 2024; Methods Protocol: v6 (2017) Year Age Female Male Total 1852-1859 0 . . . 1852-1859 1-4 . . . 1852-1859 5-9 . . . 1852-1859 10-14 . . . 1852-1859 15-19 . . . 1852-1859 20-24 . . . 1852-1859 25-29 . . .
!head USA/STATS/bltper_5x10.txt
The United States of America, Life tables (period 5x10), Total Last modified: 05 Sep 2024; Methods Protocol: v6 (2017) Year Age mx qx ax lx dx Lx Tx ex 1933-1939 0 0.06085 0.05820 0.25 100000 5820 95645 6126226 61.26 1933-1939 1-4 0.00440 0.01741 1.41 94180 1640 372477 6030581 64.03 1933-1939 5-9 0.00150 0.00748 2.30 92540 692 460835 5658104 61.14 1933-1939 10-14 0.00128 0.00637 2.62 91848 585 457850 5197268 56.59 1933-1939 15-19 0.00217 0.01078 2.70 91263 984 454051 4739419 51.93 1933-1939 20-24 0.00305 0.01514 2.58 90280 1367 448084 4285368 47.47 1933-1939 25-29 0.00351 0.01739 2.54 88913 1546 440758 3837284 43.16
!head USA/STATS/fltper_5x10.txt
The United States of America, Life tables (period 5x10), Females Last modified: 05 Sep 2024; Methods Protocol: v6 (2017) Year Age mx qx ax lx dx Lx Tx ex 1933-1939 0 0.05344 0.05139 0.25 100000 5139 96167 6341625 63.42 1933-1939 1-4 0.00410 0.01621 1.41 94861 1538 375461 6245458 65.84 1933-1939 5-9 0.00135 0.00672 2.27 93323 627 464905 5869997 62.90 1933-1939 10-14 0.00110 0.00548 2.63 92696 508 462279 5405092 58.31 1933-1939 15-19 0.00194 0.00966 2.71 92188 891 458901 4942814 53.62 1933-1939 20-24 0.00281 0.01395 2.59 91298 1274 453416 4483913 49.11 1933-1939 25-29 0.00330 0.01636 2.54 90024 1473 446493 4030497 44.77
!head -n 30 USA/STATS/mltper_5x10.txt
The United States of America, Life tables (period 5x10), Males Last modified: 05 Sep 2024; Methods Protocol: v6 (2017) Year Age mx qx ax lx dx Lx Tx ex 1933-1939 0 0.06799 0.06470 0.25 100000 6470 95148 5933542 59.34 1933-1939 1-4 0.00470 0.01858 1.42 93530 1738 369630 5838395 62.42 1933-1939 5-9 0.00165 0.00821 2.33 91793 754 456951 5468765 59.58 1933-1939 10-14 0.00145 0.00723 2.62 91039 659 453622 5011813 55.05 1933-1939 15-19 0.00239 0.01190 2.69 90380 1076 449413 4558191 50.43 1933-1939 20-24 0.00330 0.01636 2.56 89304 1461 442964 4108778 46.01 1933-1939 25-29 0.00372 0.01844 2.54 87843 1620 435230 3665815 41.73 1933-1939 30-34 0.00444 0.02194 2.59 86223 1892 426560 3230584 37.47 1933-1939 35-39 0.00596 0.02938 2.60 84331 2477 415721 2804024 33.25 1933-1939 40-44 0.00787 0.03862 2.60 81854 3161 401692 2388303 29.18 1933-1939 45-49 0.01107 0.05391 2.62 78692 4242 383386 1986611 25.25 1933-1939 50-54 0.01582 0.07622 2.60 74450 5675 358629 1603226 21.53 1933-1939 55-59 0.02163 0.10280 2.59 68775 7070 326843 1244597 18.10 1933-1939 60-64 0.03134 0.14572 2.59 61705 8992 286861 917754 14.87 1933-1939 65-69 0.04588 0.20630 2.56 52714 10875 237046 630892 11.97 1933-1939 70-74 0.06813 0.29161 2.53 41839 12201 179086 393847 9.41 1933-1939 75-79 0.10227 0.40591 2.46 29638 12031 117632 214761 7.25 1933-1939 80-84 0.14960 0.53617 2.36 17608 9441 63106 97128 5.52 1933-1939 85-89 0.21888 0.67843 2.20 8167 5541 25314 34023 4.17 1933-1939 90-94 0.28259 0.76224 1.98 2626 2002 7084 8709 3.32 1933-1939 95-99 0.37235 0.85877 1.86 624 536 1440 1625 2.60 1933-1939 100-104 0.46932 0.91699 1.68 88 81 172 185 2.10 1933-1939 105-109 0.56895 0.95216 1.51 7 7 12 13 1.75 1933-1939 110+ 0.64856 1.00000 1.54 0 0 1 1 1.54 1940-1949 0 0.04779 0.04599 0.18 100000 4599 96247 6310348 63.10 1940-1949 1-4 0.00225 0.00897 1.52 95401 855 379478 6214100 65.14 1940-1949 5-9 0.00103 0.00512 2.39 94545 484 471462 5834623 61.71
Year: Year or range of years (for both period & cohort data)
Age: Age group for n-year interval from exact age x to just before exact age x+n, where n=1, 4, 5, or ∞ (open age interval)
m(x): Central death rate between ages x and x+n
q(x): Probability of death between ages x and x+n
a(x): Average length of survival between ages x and x+n for persons dying in the interval
l(x): Number of survivors at exact age x, assuming l(0) = 100,000
d(x): Number of deaths between ages x and x+n
L(x): Number of person-years lived between ages x and x+n
T(x): Number of person-years remaining after exact age x
e(x): Life expectancy at exact age x (in years)
def read_life_table(filepath):
"""
Read a life table file with specific formatting for mortality data.
Parameters:
filepath (str): Path to the tab-delimited life table file
Returns:
pandas.DataFrame: Processed life table data with cleaned columns
str: Metadata from the first line
"""
# Read the first line separately to get the metadata
with open(filepath, 'r') as file:
metadata = file.readline().strip()
# Read the actual data, skipping the first line (metadata)
# and the empty line between metadata and column headers
df = pd.read_csv(filepath,
delim_whitespace=True,
skiprows=2, # Skip metadata and empty line
na_values=[''], # Handle empty values
engine='python') # More flexible parsing engine
# print(df.head())
# Clean column names
df.columns = df.columns.str.strip()
# print(df.columns)
# Create a mapping for column renames to match the documentation
column_mapping = {
'Year': 'Year',
'Age': 'Age',
'mx': 'm(x)', # Central death rate
'qx': 'q(x)', # Probability of death
'ax': 'a(x)', # Average survival length
'lx': 'l(x)', # Number of survivors
'dx': 'd(x)', # Number of deaths
'Lx': 'L(x)', # Person-years lived
'Tx': 'T(x)', # Person-years remaining
'ex': 'e(x)' # Life expectancy
}
# Rename columns
df = df.rename(columns=column_mapping)
# print(df.head())
# print(df.columns)
# Convert numeric columns to float
numeric_columns = ['m(x)', 'q(x)', 'a(x)', 'l(x)', 'd(x)', 'L(x)', 'T(x)', 'e(x)']
for col in numeric_columns:
df[col] = pd.to_numeric(df[col], errors='coerce')
return df, metadata
# Example usage:
# df, metadata = read_life_table('USA/STATS/bltper_5x10.txt')
# print("Metadata:", metadata)
# print("\nFirst few rows of the data:")
# print(df.head())
# Example of basic analysis:
def analyze_life_table(df):
"""
Perform basic analysis on the life table data.
Parameters:
df (pandas.DataFrame): Processed life table data
Returns:
dict: Basic statistics and insights
"""
analysis = {
'years_covered': df['Year'].unique().tolist(),
'age_groups': len(df['Age'].unique()),
'max_life_expectancy': df['e(x)'].max(),
'min_life_expectancy': df['e(x)'].min(),
'infant_mortality': df[df['Age'] == '0']['q(x)'].values[0],
'adult_mortality': df[df['Age'] == '15-19']['q(x)'].values[0]
}
return analysis
d, meta = read_life_table('USA/STATS/bltper_1x1.txt')
d
Year | Age | m(x) | q(x) | a(x) | l(x) | d(x) | L(x) | T(x) | e(x) | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 1933 | 0 | 0.06129 | 0.05861 | 0.25 | 100000 | 5861 | 95624 | 6089609 | 60.90 |
1 | 1933 | 1 | 0.00946 | 0.00941 | 0.50 | 94139 | 886 | 93696 | 5993985 | 63.67 |
2 | 1933 | 2 | 0.00435 | 0.00434 | 0.50 | 93253 | 405 | 93050 | 5900289 | 63.27 |
3 | 1933 | 3 | 0.00310 | 0.00310 | 0.50 | 92848 | 288 | 92704 | 5807239 | 62.55 |
4 | 1933 | 4 | 0.00239 | 0.00238 | 0.50 | 92560 | 221 | 92450 | 5714535 | 61.74 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
9985 | 2022 | 106 | 0.58815 | 0.45449 | 0.50 | 95 | 43 | 74 | 154 | 1.62 |
9986 | 2022 | 107 | 0.61915 | 0.47279 | 0.50 | 52 | 25 | 40 | 81 | 1.55 |
9987 | 2022 | 108 | 0.64929 | 0.49016 | 0.50 | 27 | 13 | 21 | 41 | 1.49 |
9988 | 2022 | 109 | 0.67834 | 0.50654 | 0.50 | 14 | 7 | 10 | 20 | 1.45 |
9989 | 2022 | 110+ | 0.70614 | 1.00000 | 1.42 | 7 | 7 | 10 | 10 | 1.42 |
9990 rows × 10 columns
df, _ = read_life_table('USA/STATS/bltper_5x1.txt')
df
Year | Age | m(x) | q(x) | a(x) | l(x) | d(x) | L(x) | T(x) | e(x) | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 1933 | 0 | 0.06129 | 0.05861 | 0.25 | 100000 | 5861 | 95624 | 6089609 | 60.90 |
1 | 1933 | 1-4 | 0.00484 | 0.01911 | 1.41 | 94139 | 1799 | 371900 | 5993985 | 63.67 |
2 | 1933 | 5-9 | 0.00164 | 0.00815 | 2.31 | 92340 | 752 | 459672 | 5622085 | 60.88 |
3 | 1933 | 10-14 | 0.00135 | 0.00674 | 2.61 | 91587 | 617 | 456463 | 5162413 | 56.37 |
4 | 1933 | 15-19 | 0.00229 | 0.01137 | 2.70 | 90970 | 1034 | 452468 | 4705950 | 51.73 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2155 | 2022 | 90-94 | 0.18691 | 0.62371 | 2.33 | 22650 | 14127 | 75583 | 102348 | 4.52 |
2156 | 2022 | 95-99 | 0.29637 | 0.79294 | 2.07 | 8523 | 6758 | 22803 | 26765 | 3.14 |
2157 | 2022 | 100-104 | 0.43427 | 0.90435 | 1.77 | 1765 | 1596 | 3675 | 3961 | 2.24 |
2158 | 2022 | 105-109 | 0.58547 | 0.95915 | 1.50 | 169 | 162 | 277 | 286 | 1.70 |
2159 | 2022 | 110+ | 0.70614 | 1.00000 | 1.42 | 7 | 7 | 10 | 10 | 1.42 |
2160 rows × 10 columns
# Function to extract first number from age range for sorting
def extract_first_age(age_str):
return int(age_str.split('-')[0]) if '-' in str(age_str) else int(age_str.rstrip('+'))
# Sort age groups and create ordered list
age_order = sorted(df['Age'].unique(), key=extract_first_age)
df['x+e(x)'] = df['Age'].apply(extract_first_age) + df['e(x)']
chart = alt.Chart(df).mark_line().encode(
x=alt.X('Year:O',
title='Year',
axis=alt.Axis(
labelAngle=-45,
values=list(range(1935, 2023, 5)), # Show labels every 5 years from 1950-2022
labelOverlap=False,
labelPadding=10
)),
y=alt.Y('e(x):Q',
title='Remaining Life Expectancy (years)',
scale=alt.Scale(zero=False)
),
color=alt.Color('Age:N',
title='Age Group',
sort=age_order,
scale=alt.Scale(scheme='viridis'),
legend=alt.Legend(
orient='right',
title='Age Group',
symbolLimit=50)),
tooltip=['Year', 'Age', 'e(x)']
).properties(
width=400,
height=300,
title='Remaining Life Expectancy by Age Group Over Time'
)
chart
chart = alt.Chart(df).mark_line().encode(
x=alt.X('Year:O',
title='Year',
axis=alt.Axis(
labelAngle=-45,
values=list(range(1935, 2023, 5)), # Show labels every 5 years from 1950-2022
labelOverlap=False,
labelPadding=10
)),
y=alt.Y('x+e(x):Q',
title='Life Expectancy (years)',
scale=alt.Scale(zero=False)
),
color=alt.Color('Age:N',
title='Age Group',
sort=age_order,
scale=alt.Scale(scheme='viridis'),
legend=alt.Legend(
orient='right',
title='Age Group',
symbolLimit=50)),
tooltip=['Year', 'Age', 'e(x)']
).properties(
width=400,
height=300,
title='Life Expectancy by Age Group Over Time'
)
chart
def clean_age(age_str):
"""Convert age string to integer, handling the '110+' case"""
return 110 if age_str == '110+' else int(age_str)
def calculate_survival_distribution(df, current_age, current_year):
"""
Calculate survival probabilities for all future ages given current age
Parameters:
df: DataFrame with life table data
current_age: int, current age of the person
current_year: int, current year for mortality rates
Returns:
DataFrame with survival probabilities and corresponding ages
"""
# Filter data for the specific year
year_data = df[df['Year'] == current_year].copy()
year_data['Age'] = year_data['Age'].apply(clean_age)
# Start from current age
future_ages = year_data[year_data['Age'].astype(int) >= current_age].copy()
# Initialize survival probability as 1 at current age
survival_prob = 1.0
probabilities = []
ages = []
# Calculate cumulative survival probability
for _, row in future_ages.iterrows():
age = int(row['Age'])
# Probability of dying at this age
q = row['q(x)']
# Probability of surviving to this age (from current age)
probabilities.append(survival_prob)
ages.append(age)
# Update survival probability for next age
survival_prob *= (1 - q)
# Create result DataFrame
result = pd.DataFrame({
'Age': ages,
'Survival_Probability': probabilities,
'Death_Probability': np.gradient([-p for p in probabilities], ages) # Derivative gives death probability density
})
# Calculate expected statistics
stats = {
'mean_age': current_age + year_data[year_data['Age'].astype(int) == current_age]['e(x)'].iloc[0],
'current_age': current_age,
'median_age': result[result['Survival_Probability'] <= 0.5]['Age'].iloc[0] if any(result['Survival_Probability'] <= 0.5) else np.nan
}
return result, stats
def plot_survival_distribution(result_df, stats):
"""
Create an Altair visualization of the survival distribution
"""
# Base chart properties
base = alt.Chart(result_df).properties(
width=800,
height=400,
title=f"Survival and Death Probability Distribution (Current Age: {stats['current_age']})"
)
# Survival probability line
survival = base.mark_line(color='blue').encode(
x=alt.X('Age:Q', title='Age'),
y=alt.Y('Survival_Probability:Q',
title='Probability',
axis=alt.Axis(format='%')),
tooltip=[
alt.Tooltip('Age:Q', title='Age'),
alt.Tooltip('Survival_Probability:Q', title='Survival Probability', format='.1%')
]
)
# Death probability density
death = base.mark_area(
color='red',
opacity=0.3
).encode(
x=alt.X('Age:Q'),
y=alt.Y('Death_Probability:Q',
title='Death Probability Density',
axis=alt.Axis(format='%')),
tooltip=[
alt.Tooltip('Age:Q', title='Age'),
alt.Tooltip('Death_Probability:Q', title='Death Probability Density', format='.2%')
]
)
# Add vertical lines for statistics
mean_rule = base.mark_rule(color='green').encode(
x=alt.X('mean_age:Q', title='Age'),
size=alt.value(2)
).transform_calculate(
mean_age=str(stats['mean_age'])
)
median_rule = base.mark_rule(color='orange').encode(
x=alt.X('median_age:Q', title='Age'),
size=alt.value(2)
).transform_calculate(
median_age=str(stats['median_age'])
)
# Combine all elements
chart = (survival + death + mean_rule + median_rule).resolve_scale(
y='independent'
).configure_axis(
grid=True
)
return chart
df, _ = read_life_table('USA/STATS/bltper_1x1.txt')
df
current_age = 35
current_year = 2022
result, stats = calculate_survival_distribution(df, current_age, current_year)
result, stats
plot_survival_distribution(result, stats)
def plot_survival_distribution_labels(result_df, stats, label=False):
"""Create an Altair visualization of the survival distribution with labels"""
# Base chart properties
base = alt.Chart(result_df).properties(
width=800,
height=400,
title=f"Survival and Death Probability Distribution (Current Age: {stats['current_age']})"
)
# Survival probability line
survival = base.mark_line(color='blue').encode(
x=alt.X('Age:Q', title='Age'),
y=alt.Y('Survival_Probability:Q',
title='Probability',
axis=alt.Axis(format='%')),
tooltip=[
alt.Tooltip('Age:Q', title='Age'),
alt.Tooltip('Survival_Probability:Q', title='Survival Probability', format='.1%')
]
)
# Add label for survival line
if label:
survival_label = base.mark_text(
align='left',
baseline='middle',
dx=5,
dy=-10,
color='blue'
).encode(
x=alt.value(50), # Position label near start of line
y=alt.value(350), # Adjust y position as needed
text=alt.value('Survival Probability')
)
# Death probability density
death = base.mark_area(
color='red',
opacity=0.3,
).encode(
x=alt.X('Age:Q'),
y=alt.Y('Death_Probability:Q',
title='Death Probability Density',
axis=alt.Axis(format='%')),
tooltip=[
alt.Tooltip('Age:Q', title='Age'),
alt.Tooltip('Death_Probability:Q', title='Death Probability Density', format='.2%')
]
)
# Add label for death probability
if label:
death_label = base.mark_text(
align='left',
baseline='middle',
dx=5,
dy=-10,
color='red'
).encode(
x=alt.value(50), # Position label near start of line
y=alt.value(50), # Adjust y position as needed
text=alt.value('Death Probability Density')
)
# Create labels DataFrame for statistics
labels_df = pd.DataFrame([
{'x': stats['mean_age'], 'label': f"Mean Age: {stats['mean_age']:.1f}"},
{'x': stats['median_age'], 'label': f"Median Age: {stats['median_age']:.1f}"}
])
# Add vertical lines with labels for statistics
mean_rule = base.mark_rule(color='green').encode(
x=alt.X('mean_age:Q', title='Age'),
size=alt.value(2)
).transform_calculate(
mean_age=str(stats['mean_age'])
)
mean_label = alt.Chart(pd.DataFrame([{
'x': stats['mean_age'],
'y': 1.0,
'label': f"Mean Age: {stats['mean_age']:.1f}"
}])).mark_text(
align='left',
baseline='bottom',
dy=-5,
color='green'
).encode(
x='x:Q',
y=alt.value(0),
text='label:N'
)
median_rule = base.mark_rule(color='orange').encode(
x=alt.X('median_age:Q', title='Age'),
size=alt.value(2)
).transform_calculate(
median_age=str(stats['median_age'])
)
median_label = alt.Chart(pd.DataFrame([{
'x': stats['median_age'],
'y': 1,
'label': f"Median Age: {stats['median_age']:.1f}"
}])).mark_text(
align='left',
baseline='top',
dy=5,
dx=5,
color='orange'
).encode(
x='x:Q',
y=alt.value(0),
text='label:N'
)
# Combine all elements
chart = (survival + death + mean_rule + median_rule +
mean_label + median_label).resolve_scale(
y='independent'
).configure_axis(
grid=True
)
return chart
chart = plot_survival_distribution_labels(result, stats)
chart
combined, _ = read_life_table('USA/STATS/bltper_1x1.txt')
combined['Gender'] = 'Combined'
male, _ = read_life_table('USA/STATS/mltper_1x1.txt')
male['Gender'] = 'Male'
female, _ = read_life_table('USA/STATS/fltper_1x1.txt')
female['Gender'] = 'Female'
all = pd.concat([combined, male, female])
# fix the age
all['Age'] = all['Age'].apply(clean_age)
all.to_json('all.json', orient='records')