Historical AQI Data Analysis¶
from vayuayan import CPCBHistorical
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Set style for better-looking plots
sns.set_style("whitegrid")
plt.rcParams["figure.figsize"] = (12, 6)
1. Downloading Historical Data¶
# Initialize AQI client
client = CPCBHistorical()
# Download city-level data for Delhi in 2024
# Note: Uncomment the following lines to actually download data
city = "Delhi"
year = "2024"
output_file = f"{city.lower()}_aqi_{year}.csv"
df = client.download_past_year_aqi_data_city_level(city, year, output_file)
print(f"Data downloaded to {output_file}")
# Display the first few rows of the dataframe
print(df)
Data downloaded to delhi_aqi_2024.csv
Day January February March April May June July August \
0 1 346.0 176.0 208.0 133.0 200.0 245.0 105.0 64.0
1 2 340.0 215.0 117.0 144.0 197.0 173.0 118.0 76.0
2 3 341.0 199.0 126.0 167.0 264.0 155.0 108.0 68.0
3 4 377.0 274.0 141.0 173.0 282.0 211.0 61.0 64.0
4 5 333.0 177.0 125.0 174.0 292.0 251.0 77.0 59.0
September October November December
0 101.0 149.0 339.0 285.0
1 87.0 173.0 318.0 280.0
2 89.0 162.0 381.0 268.0
3 69.0 184.0 380.0 178.0
4 83.0 145.0 372.0 165.0
2. Loading and Exploring the Data¶
# Load the data
df = pd.read_csv(output_file)
# For demonstration purposes, let's create sample data structure
# Uncomment the above line when you have actual data
print("Dataset Information:")
print(f"Shape: {df.shape}")
print(f"\nColumns: {df.columns.tolist()}")
print(f"\nData types:\n{df.dtypes}")
print(f"\nFirst few rows:")
print(df.sample())
Dataset Information:
Shape: (31, 13)
Columns: ['Day', 'January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
Data types:
Day int64
January float64
February float64
March float64
April float64
May float64
June float64
July float64
August float64
September float64
October float64
November float64
December float64
dtype: object
First few rows:
Day January February March April May June July August \
13 14 447.0 340.0 195.0 141.0 235.0 189.0 107.0 79.0
September October November December
13 62.0 234.0 424.0 193.0
3. Statistical Summary¶
# Statistical summary
print("AQI Statistics:")
print(df.describe())
AQI Statistics:
Day January February March April May \
count 31.000000 31.000000 29.000000 31.000000 30.000000 31.000000
mean 16.000000 354.838710 217.586207 175.903226 183.400000 224.032258
std 9.092121 32.498304 63.680855 33.640605 30.723012 37.689507
min 1.000000 273.000000 140.000000 117.000000 129.000000 156.000000
25% 8.500000 336.500000 161.000000 151.000000 165.500000 192.500000
50% 16.000000 348.000000 199.000000 178.000000 182.500000 230.000000
75% 23.500000 368.000000 270.000000 196.000000 202.250000 244.500000
max 31.000000 447.000000 341.000000 243.000000 240.000000 303.000000
June July August September October November \
count 30.000000 31.000000 31.000000 30.000000 31.000000 30.000000
mean 179.133333 96.064516 72.161290 105.100000 234.000000 374.233333
std 51.481321 20.325576 12.772619 43.441082 74.317338 45.848840
min 64.000000 56.000000 54.000000 52.000000 126.000000 303.000000
25% 147.500000 84.000000 63.000000 71.750000 163.000000 340.000000
50% 182.500000 97.000000 70.000000 94.500000 234.000000 371.500000
75% 207.500000 108.500000 78.000000 115.750000 298.000000 408.000000
max 306.000000 138.000000 105.000000 233.000000 364.000000 494.000000
December
count 31.000000
mean 293.838710
std 93.198389
min 139.000000
25% 212.000000
50% 285.000000
75% 369.500000
max 451.000000
4. Time Series Visualization¶
# Plot all months' AQI data for all days in a single graph
months = df.columns[1:] # Exclude 'Day' column
plt.figure(figsize=(14, 7))
for month in months:
plt.plot(df["Day"], df[month], marker="o", label=month, alpha=0.7)
plt.xlabel("Day of Month")
plt.ylabel("AQI Value")
plt.title(f"AQI Values for Each Month - {city} ({year})")
plt.legend(title="Month", bbox_to_anchor=(1.05, 1), loc="upper left")
plt.tight_layout()
plt.show()
# Plot each month's AQI data in a separate graph
# for month in months:
# plt.figure(figsize=(10, 4))
# plt.plot(df['Day'], df[month], marker='o', color='steelblue', alpha=0.8)
# plt.xlabel('Day of Month')
# plt.ylabel('AQI Value')
# plt.title(f'AQI Values in {month} - {city} ({year})')
# plt.grid(True, alpha=0.3)
# plt.tight_layout()
# plt.show()

5. Monthly Analysis¶
# Calculate mean and standard deviation of AQI for each month
aqi_means = df[months].mean()
aqi_stds = df[months].std()
# Plot mean AQI with error bars representing standard deviation
plt.figure(figsize=(10, 6))
plt.bar(months, aqi_means, yerr=aqi_stds, capsize=5, color="skyblue", alpha=0.8)
plt.ylabel("Mean AQI")
plt.xlabel("Month")
plt.title(f"Monthly Mean AQI with Variation (Std Dev) - {city} ({year})")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
