Source code for comod.data

import os
from urllib.error import HTTPError
from ssl import SSLEOFError

import pandas as pd

from .common import is_notebook

try:
    if not is_notebook():
        from tqdm import tqdm
    else:
        from tqdm.notebook import tqdm
except ImportError:
[docs] def tqdm(arg, *args, **kwargs): return arg
_data_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
[docs]def get_population_data(): """Get a series mapping countries to their population""" df = pd.read_csv(os.path.join(_data_path, "population", "population.csv")).set_index("Location") return df["PopTotal"] * 1000
[docs]def get_spain_population_data(): """Get a series mapping Spanish Autonomous communities to their population""" df = pd.read_csv(os.path.join(_data_path, "spain-population", "poblacion-ccaa.csv")).set_index("AC") return df["Population"]
[docs]def iter_JHU_daily(date_start, date_end, countries=None, columns=None): if isinstance(countries, str): countries = [countries] for date in pd.date_range(date_start, date_end): d = date.strftime("%m-%d-%Y") # American format try: df = pd.read_csv( "http://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/%s.csv" % d, usecols=columns, ) df["date"] = date if countries is not None: df = df[df["Country_Region"].apply(lambda x: x in countries)] yield df except HTTPError: print("Ignoring error in %s" % str(date.date())) except SSLEOFError: print("Ignoring ssl error in %s" % str(date.date()))
[docs]def fetch_JHU_daily(date_start, date_end, countries=None, columns=None, progress=True): """ Fetch a Dataframe with data from JHU CSSE COVID-19 Data. The full repository name is COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University. Cf. https://github.com/CSSEGISandData/COVID-19 for license and terms of use. Args: date_start (str): Start date in ISO format (e.g., 2021-01-31). date_end (str): End date in ISO format. countries (None, str or list of str): If provided, list of countries to keep. columns (None or list of str): If provided, lists of columns to use. progress (bool): Whether to diplay a progress bar. Returns: pd.DataFrame: A dataframe with the information. """ n = len(pd.date_range(date_start, date_end)) return pd.concat( tqdm( iter_JHU_daily(date_start, date_end, countries=countries, columns=columns), total=n, disable=not progress, ), ignore_index=True )