(Source: WHO)
Coronavirus disease (COVID-19) is an infectious disease caused by the SARS-CoV-2 virus.
Most people infected with the virus will experience mild to moderate respiratory illness and recover without requiring special treatment. However, some will become seriously ill and require medical attention. Older people and those with underlying medical conditions like cardiovascular disease, diabetes, chronic respiratory disease, or cancer are more likely to develop serious illness. Anyone can get sick with COVID-19 and become seriously ill or die at any age.
The best way to prevent and slow down transmission is to be well informed about the disease and how the virus spreads. Protect yourself and others from infection by staying at least 1 metre apart from others, wearing a properly fitted mask, and washing your hands or using an alcohol-based rub frequently. Get vaccinated when it’s your turn and follow local guidance.
The virus can spread from an infected person’s mouth or nose in small liquid particles when they cough, sneeze, speak, sing or breathe. These particles range from larger respiratory droplets to smaller aerosols. It is important to practice respiratory etiquette, for example by coughing into a flexed elbow, and to stay home and self-isolate until you recover if you feel unwell.
# This Python 3 environment comes with many helpful analytics libraries installed
# For example, here's several helpful packages to load
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
#Data Processing
import pandas as pd
import numpy as np
#Data Visulaisation
import plotly.express as px
#Machine Learning Libraries
import sklearn
from sklearn import linear_model
from sklearn.utils import shuffle
#Miscellaneous
import os
import warnings
warnings.filterwarnings('ignore')
! aws s3 ls s3://databeanstalk-community/data/covid-19/
2021-11-09 15:48:26 902 README.md 2021-11-09 16:02:14 622938 StatewiseTestingDetails.csv 2021-11-09 15:48:26 50596 api.ipynb 2021-11-09 15:48:26 286814 complete.csv 2021-11-09 16:02:14 11613491 country_vaccinations.csv 2021-11-09 16:04:14 880263 country_vaccinations_by_manufacturer.csv 2021-11-09 01:23:00 14784 country_wise_latest.csv 2021-11-09 15:48:26 864307 covid-info-and-analysis.ipynb 2021-11-09 01:22:59 3305202 covid_19_clean_complete.csv 2021-11-09 15:59:53 1005449 covid_19_india.csv 2021-11-09 16:05:45 1108819 covid_vaccine_statewise.csv 2021-11-09 01:22:59 14432 day_wise.csv 2021-11-09 15:48:26 61505 district_level_latest.csv 2021-11-09 01:22:59 1857039 full_grouped.csv 2021-11-09 15:48:26 6964 nation_level_daily.csv 2021-11-09 15:48:26 37458526 patients_data.csv 2021-11-09 15:48:26 213323 state_level_daily.csv 2021-11-09 15:48:26 5137 state_level_latest.csv 2021-11-09 15:48:26 21548 tests_day_wise.csv 2021-11-09 15:48:26 800270 tests_state_wise.csv 2021-11-09 01:22:58 69839185 usa_county_wise.csv 2021-11-09 15:48:26 248958 web_scraping.ipynb 2021-11-09 01:22:58 16462 worldometer_data.csv
def read_data(path,file):
return pd.read_csv(path+"/"+file)
path = 's3://databeanstalk-community/data/covid-19'
world = read_data(path,'worldometer_data.csv')
india = read_data(path,'covid_19_india.csv')
state = read_data(path,'StatewiseTestingDetails.csv')
daily = read_data(path,'nation_level_daily.csv')
vac_data = read_data(path,'country_vaccinations.csv')
pop_data = read_data(path,'worldometer_data.csv')
vac_manu = read_data(path,'country_vaccinations_by_manufacturer.csv')
state_vac=read_data(path,'covid_vaccine_statewise.csv')
daily_records=read_data(path,'day_wise.csv')
# For world Vaccination Dataset
usa_vac = vac_data[vac_data['country'] == 'United States']
uk_vac = vac_data[vac_data['country'] == 'United Kingdom']
ger_vac = vac_data[vac_data['country'] == 'Germany']
ita_vac = vac_data[vac_data['country'] == 'Italy']
fra_vac = vac_data[vac_data['country'] == 'France']
chi_vac = vac_data[vac_data['country'] == 'China']
rus_vac = vac_data[vac_data['country'] == 'Russia']
isr_vac = vac_data[vac_data['country'] == 'Israel']
uae_vac = vac_data[vac_data['country'] == 'United Arab Emirates']
can_vac = vac_data[vac_data['country'] == 'Canada']
jpn_vac = vac_data[vac_data['country'] == 'Japan']
ind_vac = vac_data[vac_data['country'] == 'India']
ino_vac = vac_data[vac_data['country'] == 'Indonesia']
mal_vac = vac_data[vac_data['country'] == 'Malaysia']
ban_vac = vac_data[vac_data['country'] == 'Bangladesh']
nig_vac = vac_data[vac_data['country'] == 'Nigeria']
phi_vac = vac_data[vac_data['country'] == 'Phillipines']
vie_vac = vac_data[vac_data['country'] == 'Vietnam']
egy_vac = vac_data[vac_data['country'] == 'Egypt']
pak_vac = vac_data[vac_data['country'] == 'Pakistan']
usa_vac.drop(usa_vac[usa_vac['daily_vaccinations'].isnull()].index, inplace=True)
uk_vac.drop(uk_vac[uk_vac['daily_vaccinations'].isnull()].index, inplace=True)
ger_vac.drop(ger_vac[ger_vac['daily_vaccinations'].isnull()].index, inplace=True)
ita_vac.drop(ita_vac[ita_vac['daily_vaccinations'].isnull()].index, inplace=True)
fra_vac.drop(fra_vac[fra_vac['daily_vaccinations'].isnull()].index, inplace=True)
chi_vac.drop(chi_vac[chi_vac['daily_vaccinations'].isnull()].index, inplace=True)
rus_vac.drop(rus_vac[rus_vac['daily_vaccinations'].isnull()].index, inplace=True)
isr_vac.drop(isr_vac[isr_vac['daily_vaccinations'].isnull()].index, inplace=True)
uae_vac.drop(uae_vac[uae_vac['daily_vaccinations'].isnull()].index, inplace=True)
can_vac.drop(can_vac[can_vac['daily_vaccinations'].isnull()].index, inplace=True)
jpn_vac.drop(jpn_vac[jpn_vac['daily_vaccinations'].isnull()].index, inplace=True)
ind_vac.drop(ind_vac[ind_vac['daily_vaccinations'].isnull()].index, inplace=True)
ino_vac.drop(ino_vac[ino_vac['daily_vaccinations'].isnull()].index, inplace=True)
mal_vac.drop(mal_vac[mal_vac['daily_vaccinations'].isnull()].index, inplace=True)
ban_vac.drop(ban_vac[ban_vac['daily_vaccinations'].isnull()].index, inplace=True)
nig_vac.drop(nig_vac[nig_vac['daily_vaccinations'].isnull()].index, inplace=True)
phi_vac.drop(phi_vac[phi_vac['daily_vaccinations'].isnull()].index, inplace=True)
vie_vac.drop(vie_vac[vie_vac['daily_vaccinations'].isnull()].index, inplace=True)
egy_vac.drop(egy_vac[egy_vac['daily_vaccinations'].isnull()].index, inplace=True)
pak_vac.drop(pak_vac[pak_vac['daily_vaccinations'].isnull()].index, inplace=True)
#For Indian Vaccination Dataset
df2=state_vac
df2 = df2.rename(columns= {'Updated On':'Date','Total Doses Administered':'TotalDoses','Male(Individuals Vaccinated)':'Male','Female(Individuals Vaccinated)':'Female', 'Total Individuals Vaccinated':'TotalVaccinated',' Covaxin (Doses Administered)':'Covaxin','CoviShield (Doses Administered)':'CoviShield','Sputnik V (Doses Administered)':'Sputnik'})
df2.Date = pd.to_datetime(df2.Date, format="%d/%m/%Y")
df3=india
df1=state
df2 = df2[df2['State'] !='India']
df2 = df2.rename(columns= {'Updated On':'Date','Total Doses Administered':'TotalDoses','Male(Individuals Vaccinated)':'Male','Female(Individuals Vaccinated)':'Female', 'Total Individuals Vaccinated':'TotalVaccinated',' Covaxin (Doses Administered)':'Covaxin','CoviShield (Doses Administered)':'CoviShield','Sputnik V (Doses Administered)':'Sputnik'})
df2.Date = pd.to_datetime(df2.Date, format="%d/%m/%Y")
df2_2=df2[df2['Date']=="2021-08-9"]
df2_2.dropna()
df2_1 = df3[df3['Date']=='2021-08-11']
features=['TotalCases','TotalDeaths','TotalRecovered','ActiveCases']
for i in features:
fig=px.treemap(world.iloc[0:25],
values=i,
path=['Country/Region'],
template='plotly_dark',
title="Tree Map depicting Impact of Covid-19 w.r.t {}".format(i))
fig.show()