Skip to contents

Sex ratio across decades

Sex ratio in India is conventionally expressed as females per 1000 males. A ratio below 1000 indicates a male-skewed population.

years <- c(1961, 1971, 1981, 1991, 2001, 2011)

pop_geo <- lapply(years, function(y) {
  census_population_time_series |>
    filter(geography == "state", year == y) |>
    mutate(sex_ratio = 1000 * females / males) |>
    attach_geometry(year = y, geography = "state")
}) |> bind_rows()
ggplot(pop_geo) +
  geom_sf(aes(fill = sex_ratio), color = "white", linewidth = 0.1) +
  scale_fill_gradientn(
    colors = get_palette("blue_red"),
    name = "Females per\n1000 males",
    limits = c(850, 1100)
  ) +
  facet_wrap(~year, nrow = 2) +
  theme_void() +
  theme(strip.text = element_text(face = "bold"), legend.position = "bottom")

National trend

national <- census_population_time_series |>
  filter(geography == "state", year %in% years) |>
  group_by(year) |>
  summarise(males = sum(males, na.rm = TRUE), females = sum(females, na.rm = TRUE)) |>
  mutate(sex_ratio = 1000 * females / males)

ggplot(national, aes(year, sex_ratio)) +
  geom_line(linewidth = 1) +
  geom_point(size = 3) +
  geom_hline(yintercept = 1000, linetype = "dashed", color = "gray50") +
  scale_y_continuous(limits = c(920, 950)) +
  labs(x = "Year", y = "Females per 1000 males", title = "National sex ratio trend") +
  theme_minimal()

state_trends <- census_population_time_series |>
  filter(geography == "state", year %in% years) |>
  mutate(sex_ratio = 1000 * females / males)

# Identify top 5 and bottom 5 by 2011 sex ratio
extremes_2011 <- state_trends |>
  filter(year == 2011) |>
  arrange(sex_ratio)

bottom_5 <- extremes_2011 |>
  slice_head(n = 5) |>
  pull(state_name_harmonized)
top_5 <- extremes_2011 |>
  slice_tail(n = 5) |>
  pull(state_name_harmonized)

# Color palette: blues/teals for high ratios (positive), reds/oranges for low (concerning)
state_colors <- c(
  # Bottom 5 (low sex ratio) - warm colors indicating concern
  setNames(c("#d73027", "#f46d43", "#fdae61", "#fee090", "#ffffbf"), bottom_5),
  # Top 5 (high sex ratio) - cool colors indicating balance
  setNames(c("#e0f3f8", "#abd9e9", "#74add1", "#4575b4", "#313695"), top_5)
)

state_trends <- state_trends |>
  mutate(
    highlight = state_name_harmonized %in% c(bottom_5, top_5),
    label = ifelse(year == 2011 & highlight, state_name_harmonized, NA)
  )

ggplot(state_trends, aes(year, sex_ratio, group = state_name_harmonized)) +
  geom_line(data = filter(state_trends, !highlight), alpha = 0.2, color = "gray50") +
  geom_line(data = filter(state_trends, highlight), aes(color = state_name_harmonized), linewidth = 1) +
  geom_text_repel(
    aes(label = label, color = state_name_harmonized),
    direction = "y",
    xlim = c(2012, NA),
    hjust = 0,
    segment.size = 0.3,
    na.rm = TRUE
  ) +
  geom_hline(yintercept = 1000, linetype = "dashed", color = "black") +
  scale_color_manual(values = state_colors) +
  scale_x_continuous(limits = c(1961, 2025)) +
  labs(x = "Year", y = "Females per 1000 males", title = "Sex ratio by state (top 5 and bottom 5 highlighted)") +
  theme_minimal() +
  theme(legend.position = "none")

Highest and lowest sex ratios (2011)

pop_2011 <- census_population_time_series |>
  filter(geography == "state", year == 2011) |>
  mutate(sex_ratio = round(1000 * females / males)) |>
  arrange(desc(sex_ratio))

cat("Highest sex ratio (2011):\n")
#> Highest sex ratio (2011):
head(pop_2011 |> select(state_name_harmonized, sex_ratio), 5)
#> # A tibble: 5 × 2
#>   state_name_harmonized sex_ratio
#>   <chr>                     <dbl>
#> 1 Kerala                     1084
#> 2 Puducherry                 1037
#> 3 Tamil Nadu                  996
#> 4 Andhra Pradesh              993
#> 5 Chhattisgarh                991

cat("\nLowest sex ratio (2011):\n")
#> 
#> Lowest sex ratio (2011):
tail(pop_2011 |> select(state_name_harmonized, sex_ratio), 5)
#> # A tibble: 5 × 2
#>   state_name_harmonized     sex_ratio
#>   <chr>                         <dbl>
#> 1 Haryana                         879
#> 2 Andaman & Nicobar Islands       876
#> 3 Nct Of Delhi                    868
#> 4 Chandigarh                      818
#> 5 Dadra & Nagar Haveli            774

Change from 1961 to 2011

change_data <- census_population_time_series |>
  filter(geography == "state", year %in% c(1961, 2011)) |>
  mutate(sex_ratio = 1000 * females / males) |>
  select(state_name_harmonized, state_name_harmonized, year, sex_ratio) |>
  tidyr::pivot_wider(names_from = year, values_from = sex_ratio, names_prefix = "y") |>
  mutate(change = y2011 - y1961) |>
  attach_geometry(year = 2011, geography = "state")

ggplot(change_data) +
  geom_sf(aes(fill = change), color = "white", linewidth = 0.2) +
  scale_fill_gradientn(
    colors = get_palette("blue_red"),
    name = "Change in\nsex ratio",
    limits = c(-100, 100),
    na.value = "grey80"
  ) +
  labs(
    title = "Change in sex ratio (1961 to 2011)",
    caption = "Grey: data not available for 1961"
  ) +
  theme_void() +
  theme(legend.position = "right")