From 4532604e6f7efa19602294f3c84da8063c694e0c Mon Sep 17 00:00:00 2001 From: cgoyal23 <167020791+cgoyal23@users.noreply.github.com> Date: Fri, 24 May 2024 22:34:00 +0530 Subject: [PATCH 1/3] Created using Colab --- 230335.ipynb | 761 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 761 insertions(+) create mode 100644 230335.ipynb diff --git a/230335.ipynb b/230335.ipynb new file mode 100644 index 0000000..d92281b --- /dev/null +++ b/230335.ipynb @@ -0,0 +1,761 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3eJmduQENaek" + }, + "source": [ + "# Assignment 1.1 - Pandas Data Analysis Practice\n", + "\n", + "*This assignment is a part of the project [\"Customer Churn Analysis: From Data To Strategy\"]*\n", + "\n", + "In this assignment, you'll get to practice some of the concepts and skills covered in pandas.\n", + "\n", + "As you go through this notebook, you will find a **???** in certain places. To complete this assignment, you must replace all the **???** with appropriate values, expressions or statements to ensure that the notebook runs properly end-to-end.\n", + "\n", + "Some things to keep in mind:\n", + "\n", + "* Do not change variable names, delete cells or disturb other existing code. It may cause problems during evaluation.\n", + "* In some cases, you may need to add some code cells or new statements before or after the line of code containing the **???**.\n", + "* Questions marked **(Optional)** will not be considered for evaluation, and can be skipped. They are for your learning.\n", + "\n", + "\n", + "If you are stuck, you can ask for help on the Whatsapp group. Please do not use **ChatGPT or any other LLM to get solutions to the questions.**\n", + "\n", + "\n", + "Deadline : **22 May, 11:59 pm**\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "nzCoPd_zNae4" + }, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yKwMduIXNae5" + }, + "source": [ + "In this assignment, we're going to analyze an operate on data from a CSV file. Let's begin by downloading the CSV file." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Xa18DHWvNae8" + }, + "source": [ + "Let's load the data from the CSV file into a Pandas data frame." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "2QPUhz19Nae-", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 287 + }, + "outputId": "9aa4e1b9-a78a-4ab6-f09c-3ad9fd4bb91d" + }, + "outputs": [ + { + "output_type": "error", + "ename": "FileNotFoundError", + "evalue": "[Errno 2] No such file or directory: 'countries.csv'", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mcountries_df\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'countries.csv'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pandas/io/parsers/readers.py\u001b[0m in \u001b[0;36mread_csv\u001b[0;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[0m\n\u001b[1;32m 910\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkwds_defaults\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 911\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 912\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_read\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 913\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 914\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pandas/io/parsers/readers.py\u001b[0m in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m 575\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 576\u001b[0m \u001b[0;31m# Create the parser.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 577\u001b[0;31m \u001b[0mparser\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mTextFileReader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 578\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 579\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mchunksize\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0miterator\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pandas/io/parsers/readers.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[1;32m 1405\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1406\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhandles\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mIOHandles\u001b[0m \u001b[0;34m|\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1407\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_make_engine\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mengine\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1408\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1409\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pandas/io/parsers/readers.py\u001b[0m in \u001b[0;36m_make_engine\u001b[0;34m(self, f, engine)\u001b[0m\n\u001b[1;32m 1659\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m\"b\"\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1660\u001b[0m \u001b[0mmode\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;34m\"b\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1661\u001b[0;31m self.handles = get_handle(\n\u001b[0m\u001b[1;32m 1662\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1663\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pandas/io/common.py\u001b[0m in \u001b[0;36mget_handle\u001b[0;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[1;32m 857\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mioargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mencoding\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;34m\"b\"\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mioargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmode\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 858\u001b[0m \u001b[0;31m# Encoding\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 859\u001b[0;31m handle = open(\n\u001b[0m\u001b[1;32m 860\u001b[0m \u001b[0mhandle\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 861\u001b[0m \u001b[0mioargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmode\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'countries.csv'" + ] + } + ], + "source": [ + "countries_df = pd.read_csv('countries.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7_kR3k1wNae_" + }, + "outputs": [], + "source": [ + "countries_df" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KE8FlOjINafA" + }, + "source": [ + "**Q1: How many countries does the dataframe contain?**\n", + "\n", + "Hint: Use the `.shape` method." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZD4-AM-CNafB" + }, + "outputs": [], + "source": [ + "num_countries = countries_df.shape[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Iy8SMfkwNafC" + }, + "outputs": [], + "source": [ + "print('There are {} countries in the dataset'.format(num_countries))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "m9eCXKMCNafE" + }, + "source": [ + "**Q2: Retrieve a list of continents from the dataframe?**\n", + "\n", + "*Hint: Use the `.unique` method of a series.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "xL8zFetJNafF" + }, + "outputs": [], + "source": [ + "continents = countries_df['continent'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "40j7HkWZNafG" + }, + "outputs": [], + "source": [ + "continents" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yr9rdD5ZNafK" + }, + "source": [ + "**Q3: What is the total population of all the countries listed in this dataset?**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "IrGeG0vwNafL" + }, + "outputs": [], + "source": [ + "total_population = countries_df['population'].sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "FUJRPPMcNafM" + }, + "outputs": [], + "source": [ + "print('The total population is {}.'.format(int(total_population)))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nvs9htXuNafO" + }, + "source": [ + "**Q: (Optional) What is the overall life expectancy across in the world?**\n", + "\n", + "*Hint: You'll need to take a weighted average of life expectancy using populations as weights.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8TXTdZ0ENafP" + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "97C4RlcPNafP" + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "H1H_0zo8NafR" + }, + "source": [ + "**Q4: Create a dataframe containing 10 countries with the highest population.**\n", + "\n", + "*Hint: Chain the `sort_values` and `head` methods.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3BBebkbyNafR" + }, + "outputs": [], + "source": [ + "most_populous_df = countries_df.sort_values('population', ascending=False).head(10)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "L83PYtCaNafS" + }, + "outputs": [], + "source": [ + "most_populous_df" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RldAR4WJNafT" + }, + "source": [ + "**Q5: Add a new column in `countries_df` to record the overall GDP per country (product of population & per capita GDP).**\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "R_mdT6GMNafU" + }, + "outputs": [], + "source": [ + "countries_df['gdp'] = countries_df['population'] * countries_df['gdp_per_capita']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "FLydP8pHNafV" + }, + "outputs": [], + "source": [ + "countries_df" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RXtXfNfBNafX" + }, + "source": [ + "**Q: (Optional) Create a dataframe containing 10 countries with the lowest GDP per capita, among the counties with population greater than 100 million.**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EkCt9Wl7NafX" + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nM5flE-QNafY" + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_NA6oQnVNafZ" + }, + "source": [ + "**Q6: Create a data frame that counts the number countries in each continent?**\n", + "\n", + "*Hint: Use `groupby`, select the `location` column and aggregate using `count`.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Co-tfE7BNafv" + }, + "outputs": [], + "source": [ + "country_counts_df = countries_df.groupby('continent')['location'].count()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6fVwjKFFNafw" + }, + "outputs": [], + "source": [ + "country_counts_df" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ut6cNp0yNafy" + }, + "source": [ + "**Q7: Create a data frame showing the total population of each continent.**\n", + "\n", + "*Hint: Use `groupby`, select the population column and aggregate using `sum`.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Hqnwb5V0Nafy" + }, + "outputs": [], + "source": [ + "continent_populations_df = countries_df.groupby('continent')['population'].sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "D5gAu5IANafz" + }, + "outputs": [], + "source": [ + "continent_populations_df" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Am9h8-R4Naf1" + }, + "source": [ + "Let's download another CSV file containing overall Covid-19 stats for various countires, and read the data into another Pandas data frame." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "sOybiAeVNaf6" + }, + "outputs": [], + "source": [ + "covid_data_df = pd.read_csv('covid-countries-data.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true, + "id": "k9PuPoYUNaf7" + }, + "outputs": [], + "source": [ + "covid_data_df" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "weKZowz0Naf8" + }, + "source": [ + "**Q8: Count the number of countries for which the `total_tests` data is missing.**\n", + "\n", + "*Hint: Use the `.isna` method.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "UybZmvdFNaf9" + }, + "outputs": [], + "source": [ + "total_tests_missing = covid_data_df['total_tests'].isna().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0_jz32NxNaf-" + }, + "outputs": [], + "source": [ + "print(\"The data for total tests is missing for {} countries.\".format(int(total_tests_missing)))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bK9yyH3GNagA" + }, + "source": [ + "Let's merge the two data frames, and compute some more metrics.\n", + "\n", + "**Q9: Merge `countries_df` with `covid_data_df` on the `location` column.**\n", + "\n", + "*Hint: Use the `.merge` method on `countries_df`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6qu0tGtJNagB" + }, + "outputs": [], + "source": [ + "combined_df = countries_df.merge(covid_data_df, on='location')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yNR9WKaENagC" + }, + "outputs": [], + "source": [ + "combined_df" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DMFIKAXvNagG" + }, + "source": [ + "**Q10: Add columns `tests_per_million`, `cases_per_million` and `deaths_per_million` into `combined_df`.**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_KdWa_juNagH" + }, + "outputs": [], + "source": [ + "combined_df['tests_per_million'] = combined_df['total_tests'] * 1e6 / combined_df['population']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "IDdn-5CONagI" + }, + "outputs": [], + "source": [ + "combined_df['cases_per_million'] = combined_df['total_cases'] * 1e6 / combined_df['population']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "93n1py5BNagK" + }, + "outputs": [], + "source": [ + "combined_df['deaths_per_million'] = combined_df['total_deaths'] * 1e6 / combined_df['population']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "IRMupBBUNagL" + }, + "outputs": [], + "source": [ + "combined_df" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VNjlGq-uNagO" + }, + "source": [ + "**Q11: Create a dataframe with 10 countires that have highest number of tests per million people.**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HBcHEMJZNagO" + }, + "outputs": [], + "source": [ + "highest_tests_df = combined_df.sort_values('tests_per_million', ascending=False).head(10)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "R57UrLhxNagR" + }, + "outputs": [], + "source": [ + "highest_tests_df" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ug-VTfalNagT" + }, + "source": [ + "**Q12: Create a dataframe with 10 countires that have highest number of positive cases per million people.**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HxjUJq8oNagU" + }, + "outputs": [], + "source": [ + "highest_cases_df = ???" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7EuO7nPENagY" + }, + "outputs": [], + "source": [ + "highest_cases_df" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GX-WdUFHNagb" + }, + "source": [ + "**Q13: Create a dataframe with 10 countires that have highest number of deaths cases per million people?**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hH19qct0Nagc" + }, + "outputs": [], + "source": [ + "highest_deaths_df = ???" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lWhydo7ENagd" + }, + "outputs": [], + "source": [ + "highest_deaths_df" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nUTDc8q4Nage" + }, + "source": [ + "**(Optional) Q: Count number of countries that feature in both the lists of \"highest number of tests per million\" and \"highest number of cases per million\".**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nXat9wCrNagf" + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Z7y8D9OtNagg" + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "aJXjCbJnNagg" + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uHWghQHUNagh" + }, + "source": [ + "**(Optional) Q: Count number of countries that feature in both the lists \"20 countries with lowest GDP per capita\" and \"20 countries with the lowest number of hospital beds per thousand population\". Only consider countries with a population higher than 10 million while creating the list.**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5C7khtW3Nagi" + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RKk0r3umNagj" + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "XH7POtliNagj" + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wvjafOzENagk" + }, + "source": [ + "## Submission\n", + "\n", + "Congratulations on making it this far! You've reached the end of this assignment, and you just completed your first real-world data analysis problem.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LBOb5DhHNagl" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.6" + }, + "colab": { + "provenance": [], + "include_colab_link": true + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file From 89d6f35b9e8dae04d53ab1bc34fe1ad16d07d239 Mon Sep 17 00:00:00 2001 From: cgoyal23 <167020791+cgoyal23@users.noreply.github.com> Date: Sat, 25 May 2024 06:47:19 +0530 Subject: [PATCH 2/3] Created using Colab --- Copy_of_Assignment_1_1_by_chirag.ipynb | 5709 ++++++++++++++++++++++++ 1 file changed, 5709 insertions(+) create mode 100644 Copy_of_Assignment_1_1_by_chirag.ipynb diff --git a/Copy_of_Assignment_1_1_by_chirag.ipynb b/Copy_of_Assignment_1_1_by_chirag.ipynb new file mode 100644 index 0000000..900e1db --- /dev/null +++ b/Copy_of_Assignment_1_1_by_chirag.ipynb @@ -0,0 +1,5709 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3eJmduQENaek" + }, + "source": [ + "# Assignment 1.1 - Pandas Data Analysis Practice\n", + "\n", + "*This assignment is a part of the project [\"Customer Churn Analysis: From Data To Strategy\"]*\n", + "\n", + "In this assignment, you'll get to practice some of the concepts and skills covered in pandas.\n", + "\n", + "As you go through this notebook, you will find a **???** in certain places. To complete this assignment, you must replace all the **???** with appropriate values, expressions or statements to ensure that the notebook runs properly end-to-end.\n", + "\n", + "Some things to keep in mind:\n", + "\n", + "* Do not change variable names, delete cells or disturb other existing code. It may cause problems during evaluation.\n", + "* In some cases, you may need to add some code cells or new statements before or after the line of code containing the **???**.\n", + "* Questions marked **(Optional)** will not be considered for evaluation, and can be skipped. They are for your learning.\n", + "\n", + "\n", + "If you are stuck, you can ask for help on the Whatsapp group. Please do not use **ChatGPT or any other LLM to get solutions to the questions.**\n", + "\n", + "\n", + "Deadline : **22 May, 11:59 pm**\n" + ] + }, + { + "cell_type": "code", + "source": [ + "from google.colab import drive\n", + "drive.mount('/content/drive')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "qmBvIMj63uUa", + "outputId": "1394f8fc-0d5b-4791-d9ea-8f8e39920e78" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Mounted at /content/drive\n" + ] + } + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nzCoPd_zNae4" + }, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yKwMduIXNae5" + }, + "source": [ + "In this assignment, we're going to analyze an operate on data from a CSV file. Let's begin by downloading the CSV file." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Xa18DHWvNae8" + }, + "source": [ + "Let's load the data from the CSV file into a Pandas data frame." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2QPUhz19Nae-" + }, + "outputs": [], + "source": [ + "countries_df = pd.read_csv('countries.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7_kR3k1wNae_", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 424 + }, + "outputId": "199cfd5f-aba7-4404-9415-69194ed8cf81" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " location continent population life_expectancy \\\n", + "0 Afghanistan Asia 38928341 64.83 \n", + "1 Albania Europe 2877800 78.57 \n", + "2 Algeria Africa 43851043 76.88 \n", + "3 Andorra Europe 77265 83.73 \n", + "4 Angola Africa 32866268 61.15 \n", + ".. ... ... ... ... \n", + "205 Vietnam Asia 97338583 75.40 \n", + "206 Western Sahara Africa 597330 70.26 \n", + "207 Yemen Asia 29825968 66.12 \n", + "208 Zambia Africa 18383956 63.89 \n", + "209 Zimbabwe Africa 14862927 61.49 \n", + "\n", + " hospital_beds_per_thousand gdp_per_capita \n", + "0 0.50 1803.987 \n", + "1 2.89 11803.431 \n", + "2 1.90 13913.839 \n", + "3 NaN NaN \n", + "4 NaN 5819.495 \n", + ".. ... ... \n", + "205 2.60 6171.884 \n", + "206 NaN NaN \n", + "207 0.70 1479.147 \n", + "208 2.00 3689.251 \n", + "209 1.70 1899.775 \n", + "\n", + "[210 rows x 6 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
locationcontinentpopulationlife_expectancyhospital_beds_per_thousandgdp_per_capita
0AfghanistanAsia3892834164.830.501803.987
1AlbaniaEurope287780078.572.8911803.431
2AlgeriaAfrica4385104376.881.9013913.839
3AndorraEurope7726583.73NaNNaN
4AngolaAfrica3286626861.15NaN5819.495
.....................
205VietnamAsia9733858375.402.606171.884
206Western SaharaAfrica59733070.26NaNNaN
207YemenAsia2982596866.120.701479.147
208ZambiaAfrica1838395663.892.003689.251
209ZimbabweAfrica1486292761.491.701899.775
\n", + "

210 rows × 6 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "countries_df", + "summary": "{\n \"name\": \"countries_df\",\n \"rows\": 210,\n \"fields\": [\n {\n \"column\": \"location\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 210,\n \"samples\": [\n \"Bulgaria\",\n \"Slovenia\",\n \"Guyana\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"continent\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"Asia\",\n \"Europe\",\n \"Oceania\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"population\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 142509205,\n \"min\": 809,\n \"max\": 1439323774,\n \"num_unique_values\": 210,\n \"samples\": [\n 6948445,\n 2078932,\n 786559\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"life_expectancy\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 7.560056151115564,\n \"min\": 53.28,\n \"max\": 86.75,\n \"num_unique_values\": 197,\n \"samples\": [\n 74.25,\n 76.16,\n 79.19\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"hospital_beds_per_thousand\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2.4649168855927868,\n \"min\": 0.1,\n \"max\": 13.8,\n \"num_unique_values\": 99,\n \"samples\": [\n 2.397,\n 0.3,\n 2.54\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"gdp_per_capita\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 19820.802488409416,\n \"min\": 661.24,\n \"max\": 116935.6,\n \"num_unique_values\": 183,\n \"samples\": [\n 50669.315,\n 3601.006,\n 1569.888\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 4 + } + ], + "source": [ + "countries_df" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KE8FlOjINafA" + }, + "source": [ + "**Q1: How many countries does the dataframe contain?**\n", + "\n", + "Hint: Use the `.shape` method." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZD4-AM-CNafB" + }, + "outputs": [], + "source": [ + "num_countries = countries_df.shape[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Iy8SMfkwNafC", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "6f00f29d-84f6-46d2-99dc-00bad9eb4e61" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "There are 210 countries in the dataset\n" + ] + } + ], + "source": [ + "print('There are {} countries in the dataset'.format(num_countries))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "m9eCXKMCNafE" + }, + "source": [ + "**Q2: Retrieve a list of continents from the dataframe?**\n", + "\n", + "*Hint: Use the `.unique` method of a series.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "xL8zFetJNafF" + }, + "outputs": [], + "source": [ + "continents = countries_df[\"continent\"].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "40j7HkWZNafG", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "c5fe26e1-f9f9-4b04-c106-d2282616d1ef" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array(['Asia', 'Europe', 'Africa', 'North America', 'South America',\n", + " 'Oceania'], dtype=object)" + ] + }, + "metadata": {}, + "execution_count": 8 + } + ], + "source": [ + "continents" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yr9rdD5ZNafK" + }, + "source": [ + "**Q3: What is the total population of all the countries listed in this dataset?**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "IrGeG0vwNafL" + }, + "outputs": [], + "source": [ + "total_population = countries_df[\"population\"].sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "FUJRPPMcNafM", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "ccb6d9df-ab33-4e53-fcbd-4494a3adc11e" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "The total population is 7757980095.\n" + ] + } + ], + "source": [ + "print('The total population is {}.'.format(int(total_population)))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nvs9htXuNafO" + }, + "source": [ + "**Q: (Optional) What is the overall life expectancy across in the world?**\n", + "\n", + "*Hint: You'll need to take a weighted average of life expectancy using populations as weights.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8TXTdZ0ENafP" + }, + "outputs": [], + "source": [ + "x= countries_df[\"population\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "97C4RlcPNafP" + }, + "outputs": [], + "source": [ + "y= countries_df[\"life_expectancy\"]" + ] + }, + { + "cell_type": "code", + "source": [ + "mean= ((x*y).sum())/x.sum()" + ], + "metadata": { + "id": "Cp6-hlNS7kvg" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "mean" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "9xesg93-_Ypv", + "outputId": "f99e34c8-9d36-4c19-8af6-6244a01463d1" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "72.72165193409664" + ] + }, + "metadata": {}, + "execution_count": 14 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "H1H_0zo8NafR" + }, + "source": [ + "**Q4: Create a dataframe containing 10 countries with the highest population.**\n", + "\n", + "*Hint: Chain the `sort_values` and `head` methods.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3BBebkbyNafR" + }, + "outputs": [], + "source": [ + "most_populous_df = countries_df.sort_values(by=\"population\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "L83PYtCaNafS" + }, + "outputs": [], + "source": [ + "a=most_populous_df.tail(10)" + ] + }, + { + "cell_type": "code", + "source": [ + "a.iloc[::-1]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "AXsWwb3nAmlg", + "outputId": "d7b17b4d-b566-444c-8188-3dd6908e6744" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " location continent population life_expectancy \\\n", + "41 China Asia 1439323774 76.91 \n", + "90 India Asia 1380004385 69.66 \n", + "199 United States North America 331002647 78.86 \n", + "91 Indonesia Asia 273523621 71.72 \n", + "145 Pakistan Asia 220892331 67.27 \n", + "27 Brazil South America 212559409 75.88 \n", + "141 Nigeria Africa 206139587 54.69 \n", + "15 Bangladesh Asia 164689383 72.59 \n", + "157 Russia Europe 145934460 72.58 \n", + "125 Mexico North America 128932753 75.05 \n", + "\n", + " hospital_beds_per_thousand gdp_per_capita \n", + "41 4.34 15308.712 \n", + "90 0.53 6426.674 \n", + "199 2.77 54225.446 \n", + "91 1.04 11188.744 \n", + "145 0.60 5034.708 \n", + "27 2.20 14103.452 \n", + "141 NaN 5338.454 \n", + "15 0.80 3523.984 \n", + "157 8.05 24765.954 \n", + "125 1.38 17336.469 " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
locationcontinentpopulationlife_expectancyhospital_beds_per_thousandgdp_per_capita
41ChinaAsia143932377476.914.3415308.712
90IndiaAsia138000438569.660.536426.674
199United StatesNorth America33100264778.862.7754225.446
91IndonesiaAsia27352362171.721.0411188.744
145PakistanAsia22089233167.270.605034.708
27BrazilSouth America21255940975.882.2014103.452
141NigeriaAfrica20613958754.69NaN5338.454
15BangladeshAsia16468938372.590.803523.984
157RussiaEurope14593446072.588.0524765.954
125MexicoNorth America12893275375.051.3817336.469
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"a\",\n \"rows\": 10,\n \"fields\": [\n {\n \"column\": \"location\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 10,\n \"samples\": [\n \"Russia\",\n \"India\",\n \"Brazil\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"continent\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"North America\",\n \"Europe\",\n \"South America\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"population\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 509275248,\n \"min\": 128932753,\n \"max\": 1439323774,\n \"num_unique_values\": 10,\n \"samples\": [\n 145934460,\n 1380004385,\n 212559409\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"life_expectancy\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 6.8433072738584855,\n \"min\": 54.69,\n \"max\": 78.86,\n \"num_unique_values\": 10,\n \"samples\": [\n 72.58,\n 69.66,\n 75.88\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"hospital_beds_per_thousand\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2.4507283497859254,\n \"min\": 0.53,\n \"max\": 8.05,\n \"num_unique_values\": 9,\n \"samples\": [\n 8.05,\n 0.53,\n 2.2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"gdp_per_capita\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 15083.054118576796,\n \"min\": 3523.984,\n \"max\": 54225.446,\n \"num_unique_values\": 10,\n \"samples\": [\n 24765.954,\n 6426.674,\n 14103.452\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 26 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RldAR4WJNafT" + }, + "source": [ + "**Q5: Add a new column in `countries_df` to record the overall GDP per country (product of population & per capita GDP).**\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "R_mdT6GMNafU" + }, + "outputs": [], + "source": [ + "countries_df['gdp'] = countries_df[\"gdp_per_capita\"]*countries_df['population']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "FLydP8pHNafV", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 424 + }, + "outputId": "330fc9ec-8a86-4d92-88dd-f90be7865b54" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " location continent population life_expectancy \\\n", + "0 Afghanistan Asia 38928341 64.83 \n", + "1 Albania Europe 2877800 78.57 \n", + "2 Algeria Africa 43851043 76.88 \n", + "3 Andorra Europe 77265 83.73 \n", + "4 Angola Africa 32866268 61.15 \n", + ".. ... ... ... ... \n", + "205 Vietnam Asia 97338583 75.40 \n", + "206 Western Sahara Africa 597330 70.26 \n", + "207 Yemen Asia 29825968 66.12 \n", + "208 Zambia Africa 18383956 63.89 \n", + "209 Zimbabwe Africa 14862927 61.49 \n", + "\n", + " hospital_beds_per_thousand gdp_per_capita gdp \n", + "0 0.50 1803.987 7.022622e+10 \n", + "1 2.89 11803.431 3.396791e+10 \n", + "2 1.90 13913.839 6.101364e+11 \n", + "3 NaN NaN NaN \n", + "4 NaN 5819.495 1.912651e+11 \n", + ".. ... ... ... \n", + "205 2.60 6171.884 6.007624e+11 \n", + "206 NaN NaN NaN \n", + "207 0.70 1479.147 4.411699e+10 \n", + "208 2.00 3689.251 6.782303e+10 \n", + "209 1.70 1899.775 2.823622e+10 \n", + "\n", + "[210 rows x 7 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
locationcontinentpopulationlife_expectancyhospital_beds_per_thousandgdp_per_capitagdp
0AfghanistanAsia3892834164.830.501803.9877.022622e+10
1AlbaniaEurope287780078.572.8911803.4313.396791e+10
2AlgeriaAfrica4385104376.881.9013913.8396.101364e+11
3AndorraEurope7726583.73NaNNaNNaN
4AngolaAfrica3286626861.15NaN5819.4951.912651e+11
........................
205VietnamAsia9733858375.402.606171.8846.007624e+11
206Western SaharaAfrica59733070.26NaNNaNNaN
207YemenAsia2982596866.120.701479.1474.411699e+10
208ZambiaAfrica1838395663.892.003689.2516.782303e+10
209ZimbabweAfrica1486292761.491.701899.7752.823622e+10
\n", + "

210 rows × 7 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "countries_df", + "summary": "{\n \"name\": \"countries_df\",\n \"rows\": 210,\n \"fields\": [\n {\n \"column\": \"location\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 210,\n \"samples\": [\n \"Bulgaria\",\n \"Slovenia\",\n \"Guyana\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"continent\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"Asia\",\n \"Europe\",\n \"Oceania\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"population\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 142509205,\n \"min\": 809,\n \"max\": 1439323774,\n \"num_unique_values\": 210,\n \"samples\": [\n 6948445,\n 2078932,\n 786559\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"life_expectancy\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 7.560056151115564,\n \"min\": 53.28,\n \"max\": 86.75,\n \"num_unique_values\": 197,\n \"samples\": [\n 74.25,\n 76.16,\n 79.19\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"hospital_beds_per_thousand\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2.4649168855927868,\n \"min\": 0.1,\n \"max\": 13.8,\n \"num_unique_values\": 99,\n \"samples\": [\n 2.397,\n 0.3,\n 2.54\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"gdp_per_capita\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 19820.802488409416,\n \"min\": 661.24,\n \"max\": 116935.6,\n \"num_unique_values\": 183,\n \"samples\": [\n 50669.315,\n 3601.006,\n 1569.888\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"gdp\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2272769090054.96,\n \"min\": 669035852.954,\n \"max\": 22034193130919.086,\n \"num_unique_values\": 183,\n \"samples\": [\n 3155330252.9950004,\n 94988326544.65,\n 17572900832.351997\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 29 + } + ], + "source": [ + "countries_df" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RXtXfNfBNafX" + }, + "source": [ + "** ***bold text***Q: (Optional) Create a dataframe containing 10 countries with the lowest GDP per capita, among the counties with population greater than 100 million.**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EkCt9Wl7NafX" + }, + "outputs": [], + "source": [ + "drop_down= countries_df[countries_df[\"population\"]>100000000]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nM5flE-QNafY" + }, + "outputs": [], + "source": [ + "cc= drop_down.sort_values(by=\"gdp_per_capita\")" + ] + }, + { + "cell_type": "code", + "source": [ + "cc.head(10)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "jawAmGewHXZr", + "outputId": "a4ed4b6b-d1d3-4aaa-fd3e-0352b12e3bff" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " location continent population life_expectancy \\\n", + "63 Ethiopia Africa 114963583 66.60 \n", + "15 Bangladesh Asia 164689383 72.59 \n", + "145 Pakistan Asia 220892331 67.27 \n", + "141 Nigeria Africa 206139587 54.69 \n", + "90 India Asia 1380004385 69.66 \n", + "151 Philippines Asia 109581085 71.23 \n", + "58 Egypt Africa 102334403 71.99 \n", + "91 Indonesia Asia 273523621 71.72 \n", + "27 Brazil South America 212559409 75.88 \n", + "41 China Asia 1439323774 76.91 \n", + "\n", + " hospital_beds_per_thousand gdp_per_capita gdp \n", + "63 0.30 1729.927 1.988786e+11 \n", + "15 0.80 3523.984 5.803628e+11 \n", + "145 0.60 5034.708 1.112128e+12 \n", + "141 NaN 5338.454 1.100467e+12 \n", + "90 0.53 6426.674 8.868838e+12 \n", + "151 1.00 7599.188 8.327273e+11 \n", + "58 1.60 10550.206 1.079649e+12 \n", + "91 1.04 11188.744 3.060386e+12 \n", + "27 2.20 14103.452 2.997821e+12 \n", + "41 4.34 15308.712 2.203419e+13 " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
locationcontinentpopulationlife_expectancyhospital_beds_per_thousandgdp_per_capitagdp
63EthiopiaAfrica11496358366.600.301729.9271.988786e+11
15BangladeshAsia16468938372.590.803523.9845.803628e+11
145PakistanAsia22089233167.270.605034.7081.112128e+12
141NigeriaAfrica20613958754.69NaN5338.4541.100467e+12
90IndiaAsia138000438569.660.536426.6748.868838e+12
151PhilippinesAsia10958108571.231.007599.1888.327273e+11
58EgyptAfrica10233440371.991.6010550.2061.079649e+12
91IndonesiaAsia27352362171.721.0411188.7443.060386e+12
27BrazilSouth America21255940975.882.2014103.4522.997821e+12
41ChinaAsia143932377476.914.3415308.7122.203419e+13
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "cc", + "summary": "{\n \"name\": \"cc\",\n \"rows\": 14,\n \"fields\": [\n {\n \"column\": \"location\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 14,\n \"samples\": [\n \"China\",\n \"Russia\",\n \"Ethiopia\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"continent\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"Asia\",\n \"Europe\",\n \"South America\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"population\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 452256768,\n \"min\": 102334403,\n \"max\": 1439323774,\n \"num_unique_values\": 14,\n \"samples\": [\n 1439323774,\n 145934460,\n 114963583\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"life_expectancy\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 6.865998562753727,\n \"min\": 54.69,\n \"max\": 84.63,\n \"num_unique_values\": 14,\n \"samples\": [\n 76.91,\n 72.58,\n 66.6\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"hospital_beds_per_thousand\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3.7111574668275322,\n \"min\": 0.3,\n \"max\": 13.05,\n \"num_unique_values\": 13,\n \"samples\": [\n 13.05,\n 1.38,\n 0.3\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"gdp_per_capita\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 14875.982836683967,\n \"min\": 1729.927,\n \"max\": 54225.446,\n \"num_unique_values\": 14,\n \"samples\": [\n 15308.712,\n 24765.954,\n 1729.927\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"gdp\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 6769601807621.744,\n \"min\": 198878606248.44098,\n \"max\": 22034193130919.086,\n \"num_unique_values\": 14,\n \"samples\": [\n 22034193130919.086,\n 3614206123374.8403,\n 198878606248.44098\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 41 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_NA6oQnVNafZ" + }, + "source": [ + "**Q6: Create a data frame that counts the number countries in each continent?**\n", + "\n", + "*Hint: Use `groupby`, select the `location` column and aggregate using `count`.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Co-tfE7BNafv" + }, + "outputs": [], + "source": [ + "country_counts_df = countries_df.groupby([\"continent\"])[\"continent\"].count()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6fVwjKFFNafw", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "9a5f513c-2b5d-4a7a-d251-4fbc6d58459e" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "continent\n", + "Africa 55\n", + "Asia 47\n", + "Europe 51\n", + "North America 36\n", + "Oceania 8\n", + "South America 13\n", + "Name: continent, dtype: int64" + ] + }, + "metadata": {}, + "execution_count": 45 + } + ], + "source": [ + "country_counts_df" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ut6cNp0yNafy" + }, + "source": [ + "**Q7: Create a data frame showing the total population of each continent.**\n", + "\n", + "*Hint: Use `groupby`, select the population column and aggregate using `sum`.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Hqnwb5V0Nafy" + }, + "outputs": [], + "source": [ + "continent_populations_df = countries_df.groupby([\"continent\"])[\"population\"].sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "D5gAu5IANafz", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "f5c1aaf6-3097-4507-dc09-8ddbb56b9144" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "continent\n", + "Africa 1339423921\n", + "Asia 4607388081\n", + "Europe 748506210\n", + "North America 591242473\n", + "Oceania 40958320\n", + "South America 430461090\n", + "Name: population, dtype: int64" + ] + }, + "metadata": {}, + "execution_count": 51 + } + ], + "source": [ + "continent_populations_df" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Am9h8-R4Naf1" + }, + "source": [ + "Let's download another CSV file containing overall Covid-19 stats for various countires, and read the data into another Pandas data frame." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "sOybiAeVNaf6" + }, + "outputs": [], + "source": [ + "covid_data_df = pd.read_csv('covid-countries-data.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true, + "id": "k9PuPoYUNaf7", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 424 + }, + "outputId": "56598118-2557-45fb-e20e-44633f2633b5" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " location total_cases total_deaths total_tests\n", + "0 Afghanistan 38243.0 1409.0 NaN\n", + "1 Albania 9728.0 296.0 NaN\n", + "2 Algeria 45158.0 1525.0 NaN\n", + "3 Andorra 1199.0 53.0 NaN\n", + "4 Angola 2729.0 109.0 NaN\n", + ".. ... ... ... ...\n", + "207 Western Sahara 766.0 1.0 NaN\n", + "208 World 26059065.0 863535.0 NaN\n", + "209 Yemen 1976.0 571.0 NaN\n", + "210 Zambia 12415.0 292.0 NaN\n", + "211 Zimbabwe 6638.0 206.0 97272.0\n", + "\n", + "[212 rows x 4 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
locationtotal_casestotal_deathstotal_tests
0Afghanistan38243.01409.0NaN
1Albania9728.0296.0NaN
2Algeria45158.01525.0NaN
3Andorra1199.053.0NaN
4Angola2729.0109.0NaN
...............
207Western Sahara766.01.0NaN
208World26059065.0863535.0NaN
209Yemen1976.0571.0NaN
210Zambia12415.0292.0NaN
211Zimbabwe6638.0206.097272.0
\n", + "

212 rows × 4 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "covid_data_df", + "summary": "{\n \"name\": \"covid_data_df\",\n \"rows\": 212,\n \"fields\": [\n {\n \"column\": \"location\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 212,\n \"samples\": [\n \"Bulgaria\",\n \"Slovenia\",\n \"Nicaragua\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"total_cases\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1875762.2640246067,\n \"min\": 3.0,\n \"max\": 26059065.0,\n \"num_unique_values\": 210,\n \"samples\": [\n 16454.0,\n 3310.0,\n 1382.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"total_deaths\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 61654.54433404689,\n \"min\": 0.0,\n \"max\": 863535.0,\n \"num_unique_values\": 157,\n \"samples\": [\n 42.0,\n 626.0,\n 329.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"total_tests\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 10616500.238255894,\n \"min\": 8408.0,\n \"max\": 83898416.0,\n \"num_unique_values\": 90,\n \"samples\": [\n 445722.0,\n 869430.0,\n 711225.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 53 + } + ], + "source": [ + "covid_data_df" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "weKZowz0Naf8" + }, + "source": [ + "**Q8: Count the number of countries for which the `total_tests` data is missing.**\n", + "\n", + "*Hint: Use the `.isna` method.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "UybZmvdFNaf9" + }, + "outputs": [], + "source": [ + "total_tests_missing = covid_data_df[\"total_tests\"].isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0_jz32NxNaf-", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "51a5f900-be50-435e-aea4-ee168acfd662" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "The data for total tests is missing for 122 countries.\n" + ] + } + ], + "source": [ + "print(\"The data for total tests is missing for {} countries.\".format(int(total_tests_missing)))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bK9yyH3GNagA" + }, + "source": [ + "Let's merge the two data frames, and compute some more metrics.\n", + "\n", + "**Q9: Merge `countries_df` with `covid_data_df` on the `location` column.**\n", + "\n", + "*Hint: Use the `.merge` method on `countries_df`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6qu0tGtJNagB" + }, + "outputs": [], + "source": [ + "combined_df = pd.merge(countries_df,covid_data_df,on=\"location\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yNR9WKaENagC", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 441 + }, + "outputId": "9f3cb0f7-dd54-49cc-cd2d-f1bef7aa6e2e" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " location continent population life_expectancy \\\n", + "0 Afghanistan Asia 38928341 64.83 \n", + "1 Albania Europe 2877800 78.57 \n", + "2 Algeria Africa 43851043 76.88 \n", + "3 Andorra Europe 77265 83.73 \n", + "4 Angola Africa 32866268 61.15 \n", + ".. ... ... ... ... \n", + "205 Vietnam Asia 97338583 75.40 \n", + "206 Western Sahara Africa 597330 70.26 \n", + "207 Yemen Asia 29825968 66.12 \n", + "208 Zambia Africa 18383956 63.89 \n", + "209 Zimbabwe Africa 14862927 61.49 \n", + "\n", + " hospital_beds_per_thousand gdp_per_capita gdp total_cases \\\n", + "0 0.50 1803.987 7.022622e+10 38243.0 \n", + "1 2.89 11803.431 3.396791e+10 9728.0 \n", + "2 1.90 13913.839 6.101364e+11 45158.0 \n", + "3 NaN NaN NaN 1199.0 \n", + "4 NaN 5819.495 1.912651e+11 2729.0 \n", + ".. ... ... ... ... \n", + "205 2.60 6171.884 6.007624e+11 1046.0 \n", + "206 NaN NaN NaN 766.0 \n", + "207 0.70 1479.147 4.411699e+10 1976.0 \n", + "208 2.00 3689.251 6.782303e+10 12415.0 \n", + "209 1.70 1899.775 2.823622e+10 6638.0 \n", + "\n", + " total_deaths total_tests \n", + "0 1409.0 NaN \n", + "1 296.0 NaN \n", + "2 1525.0 NaN \n", + "3 53.0 NaN \n", + "4 109.0 NaN \n", + ".. ... ... \n", + "205 35.0 261004.0 \n", + "206 1.0 NaN \n", + "207 571.0 NaN \n", + "208 292.0 NaN \n", + "209 206.0 97272.0 \n", + "\n", + "[210 rows x 10 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
locationcontinentpopulationlife_expectancyhospital_beds_per_thousandgdp_per_capitagdptotal_casestotal_deathstotal_tests
0AfghanistanAsia3892834164.830.501803.9877.022622e+1038243.01409.0NaN
1AlbaniaEurope287780078.572.8911803.4313.396791e+109728.0296.0NaN
2AlgeriaAfrica4385104376.881.9013913.8396.101364e+1145158.01525.0NaN
3AndorraEurope7726583.73NaNNaNNaN1199.053.0NaN
4AngolaAfrica3286626861.15NaN5819.4951.912651e+112729.0109.0NaN
.................................
205VietnamAsia9733858375.402.606171.8846.007624e+111046.035.0261004.0
206Western SaharaAfrica59733070.26NaNNaNNaN766.01.0NaN
207YemenAsia2982596866.120.701479.1474.411699e+101976.0571.0NaN
208ZambiaAfrica1838395663.892.003689.2516.782303e+1012415.0292.0NaN
209ZimbabweAfrica1486292761.491.701899.7752.823622e+106638.0206.097272.0
\n", + "

210 rows × 10 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "combined_df", + "summary": "{\n \"name\": \"combined_df\",\n \"rows\": 210,\n \"fields\": [\n {\n \"column\": \"location\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 210,\n \"samples\": [\n \"Bulgaria\",\n \"Slovenia\",\n \"Guyana\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"continent\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"Asia\",\n \"Europe\",\n \"Oceania\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"population\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 142509205,\n \"min\": 809,\n \"max\": 1439323774,\n \"num_unique_values\": 210,\n \"samples\": [\n 6948445,\n 2078932,\n 786559\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"life_expectancy\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 7.560056151115564,\n \"min\": 53.28,\n \"max\": 86.75,\n \"num_unique_values\": 197,\n \"samples\": [\n 74.25,\n 76.16,\n 79.19\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"hospital_beds_per_thousand\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2.4649168855927868,\n \"min\": 0.1,\n \"max\": 13.8,\n \"num_unique_values\": 99,\n \"samples\": [\n 2.397,\n 0.3,\n 2.54\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"gdp_per_capita\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 19820.802488409416,\n \"min\": 661.24,\n \"max\": 116935.6,\n \"num_unique_values\": 183,\n \"samples\": [\n 50669.315,\n 3601.006,\n 1569.888\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"gdp\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2272769090054.96,\n \"min\": 669035852.954,\n \"max\": 22034193130919.086,\n \"num_unique_values\": 183,\n \"samples\": [\n 3155330252.9950004,\n 94988326544.65,\n 17572900832.351997\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"total_cases\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 577759.0509830201,\n \"min\": 3.0,\n \"max\": 6114406.0,\n \"num_unique_values\": 208,\n \"samples\": [\n 896.0,\n 317528.0,\n 246116.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"total_deaths\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 17424.11835293718,\n \"min\": 0.0,\n \"max\": 185744.0,\n \"num_unique_values\": 156,\n \"samples\": [\n 29.0,\n 67376.0,\n 577.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"total_tests\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 10616500.238255894,\n \"min\": 8408.0,\n \"max\": 83898416.0,\n \"num_unique_values\": 90,\n \"samples\": [\n 445722.0,\n 869430.0,\n 711225.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 61 + } + ], + "source": [ + "combined_df" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DMFIKAXvNagG" + }, + "source": [ + "**Q10: Add columns `tests_per_million`, `cases_per_million` and `deaths_per_million` into `combined_df`.**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_KdWa_juNagH" + }, + "outputs": [], + "source": [ + "combined_df['tests_per_million'] = combined_df['total_tests'] * 1e6 / combined_df['population']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "IDdn-5CONagI" + }, + "outputs": [], + "source": [ + "combined_df['cases_per_million'] = combined_df['total_cases'] * 1e6 / combined_df['population']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "93n1py5BNagK" + }, + "outputs": [], + "source": [ + "combined_df['deaths_per_million'] = combined_df['total_deaths'] * 1e6 / combined_df['population']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "IRMupBBUNagL", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 461 + }, + "outputId": "b48b5032-ae77-4d99-8831-8ae62db8143a" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " location continent population life_expectancy \\\n", + "0 Afghanistan Asia 38928341 64.83 \n", + "1 Albania Europe 2877800 78.57 \n", + "2 Algeria Africa 43851043 76.88 \n", + "3 Andorra Europe 77265 83.73 \n", + "4 Angola Africa 32866268 61.15 \n", + ".. ... ... ... ... \n", + "205 Vietnam Asia 97338583 75.40 \n", + "206 Western Sahara Africa 597330 70.26 \n", + "207 Yemen Asia 29825968 66.12 \n", + "208 Zambia Africa 18383956 63.89 \n", + "209 Zimbabwe Africa 14862927 61.49 \n", + "\n", + " hospital_beds_per_thousand gdp_per_capita gdp total_cases \\\n", + "0 0.50 1803.987 7.022622e+10 38243.0 \n", + "1 2.89 11803.431 3.396791e+10 9728.0 \n", + "2 1.90 13913.839 6.101364e+11 45158.0 \n", + "3 NaN NaN NaN 1199.0 \n", + "4 NaN 5819.495 1.912651e+11 2729.0 \n", + ".. ... ... ... ... \n", + "205 2.60 6171.884 6.007624e+11 1046.0 \n", + "206 NaN NaN NaN 766.0 \n", + "207 0.70 1479.147 4.411699e+10 1976.0 \n", + "208 2.00 3689.251 6.782303e+10 12415.0 \n", + "209 1.70 1899.775 2.823622e+10 6638.0 \n", + "\n", + " total_deaths total_tests tests_per_million cases_per_million \\\n", + "0 1409.0 NaN NaN 982.394806 \n", + "1 296.0 NaN NaN 3380.359997 \n", + "2 1525.0 NaN NaN 1029.804468 \n", + "3 53.0 NaN NaN 15518.022390 \n", + "4 109.0 NaN NaN 83.033462 \n", + ".. ... ... ... ... \n", + "205 35.0 261004.0 2681.403324 10.745996 \n", + "206 1.0 NaN NaN 1282.373228 \n", + "207 571.0 NaN NaN 66.250993 \n", + "208 292.0 NaN NaN 675.317108 \n", + "209 206.0 97272.0 6544.605918 446.614587 \n", + "\n", + " deaths_per_million \n", + "0 36.194710 \n", + "1 102.856349 \n", + "2 34.776824 \n", + "3 685.950948 \n", + "4 3.316470 \n", + ".. ... \n", + "205 0.359570 \n", + "206 1.674116 \n", + "207 19.144391 \n", + "208 15.883415 \n", + "209 13.859989 \n", + "\n", + "[210 rows x 13 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
locationcontinentpopulationlife_expectancyhospital_beds_per_thousandgdp_per_capitagdptotal_casestotal_deathstotal_teststests_per_millioncases_per_milliondeaths_per_million
0AfghanistanAsia3892834164.830.501803.9877.022622e+1038243.01409.0NaNNaN982.39480636.194710
1AlbaniaEurope287780078.572.8911803.4313.396791e+109728.0296.0NaNNaN3380.359997102.856349
2AlgeriaAfrica4385104376.881.9013913.8396.101364e+1145158.01525.0NaNNaN1029.80446834.776824
3AndorraEurope7726583.73NaNNaNNaN1199.053.0NaNNaN15518.022390685.950948
4AngolaAfrica3286626861.15NaN5819.4951.912651e+112729.0109.0NaNNaN83.0334623.316470
..........................................
205VietnamAsia9733858375.402.606171.8846.007624e+111046.035.0261004.02681.40332410.7459960.359570
206Western SaharaAfrica59733070.26NaNNaNNaN766.01.0NaNNaN1282.3732281.674116
207YemenAsia2982596866.120.701479.1474.411699e+101976.0571.0NaNNaN66.25099319.144391
208ZambiaAfrica1838395663.892.003689.2516.782303e+1012415.0292.0NaNNaN675.31710815.883415
209ZimbabweAfrica1486292761.491.701899.7752.823622e+106638.0206.097272.06544.605918446.61458713.859989
\n", + "

210 rows × 13 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "combined_df", + "summary": "{\n \"name\": \"combined_df\",\n \"rows\": 210,\n \"fields\": [\n {\n \"column\": \"location\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 210,\n \"samples\": [\n \"Bulgaria\",\n \"Slovenia\",\n \"Guyana\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"continent\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"Asia\",\n \"Europe\",\n \"Oceania\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"population\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 142509205,\n \"min\": 809,\n \"max\": 1439323774,\n \"num_unique_values\": 210,\n \"samples\": [\n 6948445,\n 2078932,\n 786559\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"life_expectancy\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 7.560056151115564,\n \"min\": 53.28,\n \"max\": 86.75,\n \"num_unique_values\": 197,\n \"samples\": [\n 74.25,\n 76.16,\n 79.19\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"hospital_beds_per_thousand\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2.4649168855927868,\n \"min\": 0.1,\n \"max\": 13.8,\n \"num_unique_values\": 99,\n \"samples\": [\n 2.397,\n 0.3,\n 2.54\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"gdp_per_capita\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 19820.802488409416,\n \"min\": 661.24,\n \"max\": 116935.6,\n \"num_unique_values\": 183,\n \"samples\": [\n 50669.315,\n 3601.006,\n 1569.888\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"gdp\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2272769090054.96,\n \"min\": 669035852.954,\n \"max\": 22034193130919.086,\n \"num_unique_values\": 183,\n \"samples\": [\n 3155330252.9950004,\n 94988326544.65,\n 17572900832.351997\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"total_cases\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 577759.0509830201,\n \"min\": 3.0,\n \"max\": 6114406.0,\n \"num_unique_values\": 208,\n \"samples\": [\n 896.0,\n 317528.0,\n 246116.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"total_deaths\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 17424.11835293718,\n \"min\": 0.0,\n \"max\": 185744.0,\n \"num_unique_values\": 156,\n \"samples\": [\n 29.0,\n 67376.0,\n 577.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"total_tests\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 10616500.238255894,\n \"min\": 8408.0,\n \"max\": 83898416.0,\n \"num_unique_values\": 90,\n \"samples\": [\n 445722.0,\n 869430.0,\n 711225.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"tests_per_million\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 137501.5426053333,\n \"min\": 1969.1317223799424,\n \"max\": 725696.6351209254,\n \"num_unique_values\": 90,\n \"samples\": [\n 8289.217482188455,\n 7562.655732467907,\n 131192.26184700849\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"cases_per_million\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 5920.784473156789,\n \"min\": 3.0238238837004348,\n \"max\": 41375.74364990663,\n \"num_unique_values\": 209,\n \"samples\": [\n 2368.011835741666,\n 1437.2764477144995,\n 1757.0201345353623\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"deaths_per_million\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 181.9372798469054,\n \"min\": 0.0,\n \"max\": 1237.5508279804349,\n \"num_unique_values\": 189,\n \"samples\": [\n 0.3595696477315681,\n 576.2792756308129,\n 854.0403754533369\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 65 + } + ], + "source": [ + "combined_df" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VNjlGq-uNagO" + }, + "source": [ + "**Q11: Create a dataframe with 10 countires that have highest number of tests per million people.**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HBcHEMJZNagO" + }, + "outputs": [], + "source": [ + "highest_tests_df = combined_df.sort_values(by=\"tests_per_million\").tail(10)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "R57UrLhxNagR", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 539 + }, + "outputId": "54cf7dcd-37f8-4adc-cdc6-e0a9bd288dde" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " location continent population life_expectancy \\\n", + "189 Timor Asia 1318442 69.50 \n", + "191 Trinidad and Tobago North America 1399491 73.51 \n", + "194 Turks and Caicos Islands North America 38718 80.22 \n", + "200 United States Virgin Islands North America 104423 80.58 \n", + "202 Uzbekistan Asia 33469199 71.72 \n", + "203 Vatican Europe 809 75.12 \n", + "204 Venezuela South America 28435943 72.06 \n", + "206 Western Sahara Africa 597330 70.26 \n", + "207 Yemen Asia 29825968 66.12 \n", + "208 Zambia Africa 18383956 63.89 \n", + "\n", + " hospital_beds_per_thousand gdp_per_capita gdp total_cases \\\n", + "189 5.9 6570.102 8.662298e+09 27.0 \n", + "191 3.0 28763.071 4.025366e+10 1920.0 \n", + "194 NaN NaN NaN 555.0 \n", + "200 NaN NaN NaN 1144.0 \n", + "202 4.0 6253.104 2.092864e+11 42540.0 \n", + "203 NaN NaN NaN 12.0 \n", + "204 0.8 16745.022 4.761605e+11 48883.0 \n", + "206 NaN NaN NaN 766.0 \n", + "207 0.7 1479.147 4.411699e+10 1976.0 \n", + "208 2.0 3689.251 6.782303e+10 12415.0 \n", + "\n", + " total_deaths total_tests tests_per_million cases_per_million \\\n", + "189 0.0 NaN NaN 20.478717 \n", + "191 28.0 NaN NaN 1371.927365 \n", + "194 4.0 NaN NaN 14334.418100 \n", + "200 15.0 NaN NaN 10955.440851 \n", + "202 329.0 NaN NaN 1271.019363 \n", + "203 0.0 NaN NaN 14833.127318 \n", + "204 398.0 NaN NaN 1719.056759 \n", + "206 1.0 NaN NaN 1282.373228 \n", + "207 571.0 NaN NaN 66.250993 \n", + "208 292.0 NaN NaN 675.317108 \n", + "\n", + " deaths_per_million \n", + "189 0.000000 \n", + "191 20.007274 \n", + "194 103.311121 \n", + "200 143.646515 \n", + "202 9.829933 \n", + "203 0.000000 \n", + "204 13.996371 \n", + "206 1.674116 \n", + "207 19.144391 \n", + "208 15.883415 " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
locationcontinentpopulationlife_expectancyhospital_beds_per_thousandgdp_per_capitagdptotal_casestotal_deathstotal_teststests_per_millioncases_per_milliondeaths_per_million
189TimorAsia131844269.505.96570.1028.662298e+0927.00.0NaNNaN20.4787170.000000
191Trinidad and TobagoNorth America139949173.513.028763.0714.025366e+101920.028.0NaNNaN1371.92736520.007274
194Turks and Caicos IslandsNorth America3871880.22NaNNaNNaN555.04.0NaNNaN14334.418100103.311121
200United States Virgin IslandsNorth America10442380.58NaNNaNNaN1144.015.0NaNNaN10955.440851143.646515
202UzbekistanAsia3346919971.724.06253.1042.092864e+1142540.0329.0NaNNaN1271.0193639.829933
203VaticanEurope80975.12NaNNaNNaN12.00.0NaNNaN14833.1273180.000000
204VenezuelaSouth America2843594372.060.816745.0224.761605e+1148883.0398.0NaNNaN1719.05675913.996371
206Western SaharaAfrica59733070.26NaNNaNNaN766.01.0NaNNaN1282.3732281.674116
207YemenAsia2982596866.120.71479.1474.411699e+101976.0571.0NaNNaN66.25099319.144391
208ZambiaAfrica1838395663.892.03689.2516.782303e+1012415.0292.0NaNNaN675.31710815.883415
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "highest_tests_df", + "summary": "{\n \"name\": \"highest_tests_df\",\n \"rows\": 10,\n \"fields\": [\n {\n \"column\": \"location\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 10,\n \"samples\": [\n \"Yemen\",\n \"Trinidad and Tobago\",\n \"Vatican\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"continent\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"North America\",\n \"Africa\",\n \"Europe\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"population\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 14416436,\n \"min\": 809,\n \"max\": 33469199,\n \"num_unique_values\": 10,\n \"samples\": [\n 29825968,\n 1399491,\n 809\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"life_expectancy\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 5.39565009984895,\n \"min\": 63.89,\n \"max\": 80.58,\n \"num_unique_values\": 10,\n \"samples\": [\n 66.12,\n 73.51,\n 75.12\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"hospital_beds_per_thousand\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2.0056586615540213,\n \"min\": 0.7,\n \"max\": 5.9,\n \"num_unique_values\": 6,\n \"samples\": [\n 5.9,\n 3.0,\n 2.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"gdp_per_capita\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 10331.899734931489,\n \"min\": 1479.147,\n \"max\": 28763.071,\n \"num_unique_values\": 6,\n \"samples\": [\n 6570.102,\n 28763.071,\n 3689.251\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"gdp\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 178549628190.39688,\n \"min\": 8662298421.084,\n \"max\": 476160491125.74603,\n \"num_unique_values\": 6,\n \"samples\": [\n 8662298421.084,\n 40253658996.861,\n 67823028056.956\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"total_cases\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 18702.00588053473,\n \"min\": 12.0,\n \"max\": 48883.0,\n \"num_unique_values\": 10,\n \"samples\": [\n 1976.0,\n 1920.0,\n 12.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"total_deaths\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 213.61221355010994,\n \"min\": 0.0,\n \"max\": 571.0,\n \"num_unique_values\": 9,\n \"samples\": [\n 571.0,\n 28.0,\n 398.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"total_tests\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"tests_per_million\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": null,\n \"max\": null,\n \"num_unique_values\": 0,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"cases_per_million\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 6124.436199092139,\n \"min\": 20.47871654574111,\n \"max\": 14833.127317676144,\n \"num_unique_values\": 10,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"deaths_per_million\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 49.32516121758338,\n \"min\": 0.0,\n \"max\": 143.6465146567327,\n \"num_unique_values\": 9,\n \"samples\": [],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 67 + } + ], + "source": [ + "highest_tests_df" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ug-VTfalNagT" + }, + "source": [ + "**Q12: Create a dataframe with 10 countires that have highest number of positive cases per million people.**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HxjUJq8oNagU" + }, + "outputs": [], + "source": [ + "highest_cases_df = combined_df.sort_values(by=\"total_cases\").tail(10)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7EuO7nPENagY", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 504 + }, + "outputId": "09665709-b020-42b9-d77a-125a4490ce62" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " location continent population life_expectancy \\\n", + "177 Spain Europe 46754783 83.56 \n", + "125 Mexico North America 128932753 75.05 \n", + "174 South Africa Africa 59308690 64.13 \n", + "42 Colombia South America 50882884 77.29 \n", + "150 Peru South America 32971846 76.74 \n", + "157 Russia Europe 145934460 72.58 \n", + "90 India Asia 1380004385 69.66 \n", + "27 Brazil South America 212559409 75.88 \n", + "199 United States North America 331002647 78.86 \n", + "87 Hong Kong Asia 7496988 84.86 \n", + "\n", + " hospital_beds_per_thousand gdp_per_capita gdp total_cases \\\n", + "177 2.97 34272.360 1.602397e+12 479554.0 \n", + "125 1.38 17336.469 2.235239e+12 610957.0 \n", + "174 2.32 12294.876 7.291930e+11 630595.0 \n", + "42 1.71 13254.949 6.744500e+11 633339.0 \n", + "150 1.60 12236.706 4.034668e+11 663437.0 \n", + "157 8.05 24765.954 3.614206e+12 1005000.0 \n", + "90 0.53 6426.674 8.868838e+12 3853406.0 \n", + "27 2.20 14103.452 2.997821e+12 3997865.0 \n", + "199 2.77 54225.446 1.794877e+13 6114406.0 \n", + "87 NaN 56054.920 4.202431e+11 NaN \n", + "\n", + " total_deaths total_tests tests_per_million cases_per_million \\\n", + "177 29194.0 6416533.0 137238.001939 10256.790198 \n", + "125 65816.0 1271295.0 9860.140037 4738.570967 \n", + "174 14389.0 3705408.0 62476.645497 10632.421657 \n", + "42 20348.0 2647702.0 52035.218758 12446.994946 \n", + "150 29259.0 584232.0 17719.117092 20121.318048 \n", + "157 17414.0 37176827.0 254750.159763 6886.653091 \n", + "90 67376.0 44337201.0 32128.304433 2792.314316 \n", + "27 123780.0 4797948.0 22572.268255 18808.224105 \n", + "199 185744.0 83898416.0 253467.507769 18472.377957 \n", + "87 NaN 780410.0 104096.471810 NaN \n", + "\n", + " deaths_per_million \n", + "177 624.406705 \n", + "125 510.467654 \n", + "174 242.612002 \n", + "42 399.898716 \n", + "150 887.393445 \n", + "157 119.327539 \n", + "90 48.823033 \n", + "27 582.331314 \n", + "199 561.155633 \n", + "87 NaN " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
locationcontinentpopulationlife_expectancyhospital_beds_per_thousandgdp_per_capitagdptotal_casestotal_deathstotal_teststests_per_millioncases_per_milliondeaths_per_million
177SpainEurope4675478383.562.9734272.3601.602397e+12479554.029194.06416533.0137238.00193910256.790198624.406705
125MexicoNorth America12893275375.051.3817336.4692.235239e+12610957.065816.01271295.09860.1400374738.570967510.467654
174South AfricaAfrica5930869064.132.3212294.8767.291930e+11630595.014389.03705408.062476.64549710632.421657242.612002
42ColombiaSouth America5088288477.291.7113254.9496.744500e+11633339.020348.02647702.052035.21875812446.994946399.898716
150PeruSouth America3297184676.741.6012236.7064.034668e+11663437.029259.0584232.017719.11709220121.318048887.393445
157RussiaEurope14593446072.588.0524765.9543.614206e+121005000.017414.037176827.0254750.1597636886.653091119.327539
90IndiaAsia138000438569.660.536426.6748.868838e+123853406.067376.044337201.032128.3044332792.31431648.823033
27BrazilSouth America21255940975.882.2014103.4522.997821e+123997865.0123780.04797948.022572.26825518808.224105582.331314
199United StatesNorth America33100264778.862.7754225.4461.794877e+136114406.0185744.083898416.0253467.50776918472.377957561.155633
87Hong KongAsia749698884.86NaN56054.9204.202431e+11NaNNaN780410.0104096.471810NaNNaN
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "highest_cases_df", + "summary": "{\n \"name\": \"highest_cases_df\",\n \"rows\": 10,\n \"fields\": [\n {\n \"column\": \"location\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 10,\n \"samples\": [\n \"United States\",\n \"Mexico\",\n \"Russia\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"continent\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"North America\",\n \"Asia\",\n \"Africa\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"population\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 412647772,\n \"min\": 7496988,\n \"max\": 1380004385,\n \"num_unique_values\": 10,\n \"samples\": [\n 331002647,\n 128932753,\n 145934460\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"life_expectancy\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 6.14235830569624,\n \"min\": 64.13,\n \"max\": 84.86,\n \"num_unique_values\": 10,\n \"samples\": [\n 78.86,\n 75.05,\n 72.58\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"hospital_beds_per_thousand\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2.1699718380149036,\n \"min\": 0.53,\n \"max\": 8.05,\n \"num_unique_values\": 9,\n \"samples\": [\n 2.2,\n 1.38,\n 8.05\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"gdp_per_capita\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 17909.46022854285,\n \"min\": 6426.674,\n \"max\": 56054.92,\n \"num_unique_values\": 10,\n \"samples\": [\n 54225.446,\n 17336.469,\n 24765.954\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"gdp\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 5535455191534.749,\n \"min\": 403466785779.276,\n \"max\": 17948766160755.562,\n \"num_unique_values\": 10,\n \"samples\": [\n 17948766160755.562,\n 2235238675469.157,\n 3614206123374.8403\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"total_cases\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2095100.088257228,\n \"min\": 479554.0,\n \"max\": 6114406.0,\n \"num_unique_values\": 9,\n \"samples\": [\n 3997865.0,\n 610957.0,\n 1005000.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"total_deaths\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 58425.36917512803,\n \"min\": 14389.0,\n \"max\": 185744.0,\n \"num_unique_values\": 9,\n \"samples\": [\n 123780.0,\n 65816.0,\n 17414.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"total_tests\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 27946413.588854726,\n \"min\": 584232.0,\n \"max\": 83898416.0,\n \"num_unique_values\": 10,\n \"samples\": [\n 83898416.0,\n 1271295.0,\n 37176827.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"tests_per_million\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 92988.064035842,\n \"min\": 9860.14003749691,\n \"max\": 254750.1597634993,\n \"num_unique_values\": 10,\n \"samples\": [\n 253467.50776890313,\n 9860.14003749691,\n 254750.1597634993\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"cases_per_million\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 6344.933830014123,\n \"min\": 2792.3143157258883,\n \"max\": 20121.318048131125,\n \"num_unique_values\": 9,\n \"samples\": [\n 18808.224104537287,\n 4738.570966525473,\n 6886.65309070935\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"deaths_per_million\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 267.034582574439,\n \"min\": 48.823033268839936,\n \"max\": 887.3934446982435,\n \"num_unique_values\": 9,\n \"samples\": [\n 582.3313142538893,\n 510.46765440585915,\n 119.3275392254852\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 71 + } + ], + "source": [ + "highest_cases_df" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GX-WdUFHNagb" + }, + "source": [ + "**Q13: Create a dataframe with 10 countires that have highest number of deaths cases per million people?**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hH19qct0Nagc" + }, + "outputs": [], + "source": [ + "highest_deaths_df = combined_df.sort_values(by=\"deaths_per_million\").tail(10)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lWhydo7ENagd", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 487 + }, + "outputId": "fd8ac747-80fa-499b-adb0-347dfa8ee55d" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " location continent population life_expectancy \\\n", + "27 Brazil South America 212559409 75.88 \n", + "97 Italy Europe 60461828 83.51 \n", + "40 Chile South America 19116209 80.18 \n", + "198 United Kingdom Europe 67886004 81.32 \n", + "177 Spain Europe 46754783 83.56 \n", + "3 Andorra Europe 77265 83.73 \n", + "18 Belgium Europe 11589616 81.63 \n", + "150 Peru South America 32971846 76.74 \n", + "162 San Marino Europe 33938 84.97 \n", + "87 Hong Kong Asia 7496988 84.86 \n", + "\n", + " hospital_beds_per_thousand gdp_per_capita gdp total_cases \\\n", + "27 2.20 14103.452 2.997821e+12 3997865.0 \n", + "97 3.18 35220.084 2.129471e+12 271515.0 \n", + "40 2.11 22767.037 4.352194e+11 414739.0 \n", + "198 2.54 39753.244 2.698689e+12 338676.0 \n", + "177 2.97 34272.360 1.602397e+12 479554.0 \n", + "3 NaN NaN NaN 1199.0 \n", + "18 5.64 42658.576 4.943965e+11 85817.0 \n", + "150 1.60 12236.706 4.034668e+11 663437.0 \n", + "162 3.80 56861.470 1.929765e+09 735.0 \n", + "87 NaN 56054.920 4.202431e+11 NaN \n", + "\n", + " total_deaths total_tests tests_per_million cases_per_million \\\n", + "27 123780.0 4797948.0 22572.268255 18808.224105 \n", + "97 35497.0 5214766.0 86248.897403 4490.684602 \n", + "40 11344.0 2458762.0 128621.841287 21695.671982 \n", + "198 41514.0 13447568.0 198090.434075 4988.892850 \n", + "177 29194.0 6416533.0 137238.001939 10256.790198 \n", + "3 53.0 NaN NaN 15518.022390 \n", + "18 9898.0 2281853.0 196887.713967 7404.645676 \n", + "150 29259.0 584232.0 17719.117092 20121.318048 \n", + "162 42.0 NaN NaN 21657.139490 \n", + "87 NaN 780410.0 104096.471810 NaN \n", + "\n", + " deaths_per_million \n", + "27 582.331314 \n", + "97 587.097697 \n", + "40 593.423100 \n", + "198 611.525168 \n", + "177 624.406705 \n", + "3 685.950948 \n", + "18 854.040375 \n", + "150 887.393445 \n", + "162 1237.550828 \n", + "87 NaN " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
locationcontinentpopulationlife_expectancyhospital_beds_per_thousandgdp_per_capitagdptotal_casestotal_deathstotal_teststests_per_millioncases_per_milliondeaths_per_million
27BrazilSouth America21255940975.882.2014103.4522.997821e+123997865.0123780.04797948.022572.26825518808.224105582.331314
97ItalyEurope6046182883.513.1835220.0842.129471e+12271515.035497.05214766.086248.8974034490.684602587.097697
40ChileSouth America1911620980.182.1122767.0374.352194e+11414739.011344.02458762.0128621.84128721695.671982593.423100
198United KingdomEurope6788600481.322.5439753.2442.698689e+12338676.041514.013447568.0198090.4340754988.892850611.525168
177SpainEurope4675478383.562.9734272.3601.602397e+12479554.029194.06416533.0137238.00193910256.790198624.406705
3AndorraEurope7726583.73NaNNaNNaN1199.053.0NaNNaN15518.022390685.950948
18BelgiumEurope1158961681.635.6442658.5764.943965e+1185817.09898.02281853.0196887.7139677404.645676854.040375
150PeruSouth America3297184676.741.6012236.7064.034668e+11663437.029259.0584232.017719.11709220121.318048887.393445
162San MarinoEurope3393884.973.8056861.4701.929765e+09735.042.0NaNNaN21657.1394901237.550828
87Hong KongAsia749698884.86NaN56054.9204.202431e+11NaNNaN780410.0104096.471810NaNNaN
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "highest_deaths_df", + "summary": "{\n \"name\": \"highest_deaths_df\",\n \"rows\": 10,\n \"fields\": [\n {\n \"column\": \"location\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 10,\n \"samples\": [\n \"San Marino\",\n \"Italy\",\n \"Andorra\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"continent\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"South America\",\n \"Europe\",\n \"Asia\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"population\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 63431013,\n \"min\": 33938,\n \"max\": 212559409,\n \"num_unique_values\": 10,\n \"samples\": [\n 33938,\n 60461828,\n 77265\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"life_expectancy\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3.204690312651133,\n \"min\": 75.88,\n \"max\": 84.97,\n \"num_unique_values\": 10,\n \"samples\": [\n 84.97,\n 83.51,\n 83.73\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"hospital_beds_per_thousand\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.2675285288420814,\n \"min\": 1.6,\n \"max\": 5.64,\n \"num_unique_values\": 8,\n \"samples\": [\n 3.18,\n 5.64,\n 2.2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"gdp_per_capita\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 16230.409234274475,\n \"min\": 12236.706,\n \"max\": 56861.47,\n \"num_unique_values\": 9,\n \"samples\": [\n 56861.47,\n 35220.084,\n 42658.576\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"gdp\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1131831495984.7166,\n \"min\": 1929764568.8600001,\n \"max\": 2997821421979.8677,\n \"num_unique_values\": 9,\n \"samples\": [\n 1929764568.8600001,\n 2129470660953.5522,\n 494396514946.81604\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"total_cases\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1258751.8810278014,\n \"min\": 735.0,\n \"max\": 3997865.0,\n \"num_unique_values\": 9,\n \"samples\": [\n 663437.0,\n 271515.0,\n 1199.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"total_deaths\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 37902.305183853925,\n \"min\": 42.0,\n \"max\": 123780.0,\n \"num_unique_values\": 9,\n \"samples\": [\n 29259.0,\n 35497.0,\n 53.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"total_tests\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 4184787.289485947,\n \"min\": 584232.0,\n \"max\": 13447568.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 5214766.0,\n 2281853.0,\n 4797948.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"tests_per_million\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 68806.1028862353,\n \"min\": 17719.117091593842,\n \"max\": 198090.43407533606,\n \"num_unique_values\": 8,\n \"samples\": [\n 86248.89740349895,\n 196887.7139673998,\n 22572.26825466004\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"cases_per_million\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 7156.832665095871,\n \"min\": 4490.684601861525,\n \"max\": 21695.67198182443,\n \"num_unique_values\": 9,\n \"samples\": [\n 20121.318048131125,\n 4490.684601861525,\n 15518.022390474342\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"deaths_per_million\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 219.31641882952675,\n \"min\": 582.3313142538893,\n \"max\": 1237.5508279804349,\n \"num_unique_values\": 9,\n \"samples\": [\n 887.3934446982435,\n 587.0976974100089,\n 685.9509480359801\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 73 + } + ], + "source": [ + "highest_deaths_df" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nUTDc8q4Nage" + }, + "source": [ + "**(Optional) Q: Count number of countries that feature in both the lists of \"highest number of tests per million\" and \"highest number of cases per million\".**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nXat9wCrNagf", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "dfe4275b-5628-43b5-8bd3-422fb75e27d1" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "4" + ] + }, + "metadata": {}, + "execution_count": 76 + } + ], + "source": [ + "pd.merge(highest_cases_df,highest_deaths_df,on=\"location\").shape[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uHWghQHUNagh" + }, + "source": [ + "**(Optional) Q: Count number of countries that feature in both the lists \"20 countries with lowest GDP per capita\" and \"20 countries with the lowest number of hospital beds per thousand population\". Only consider countries with a population higher than 10 million while creating the list.**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5C7khtW3Nagi" + }, + "outputs": [], + "source": [ + "cc= combined_df[combined_df['population']>100000000]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RKk0r3umNagj" + }, + "outputs": [], + "source": [ + "cc1=combined_df.sort_values(by=\"gdp_per_capita\").head(20)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "XH7POtliNagj" + }, + "outputs": [], + "source": [ + "cc2=combined_df.sort_values(by=\"hospital_beds_per_thousand\").head(20)" + ] + }, + { + "cell_type": "code", + "source": [ + "pd.merge(cc1,cc2,on=\"location\").shape[0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "pReXGGslS9is", + "outputId": "1e9d56e3-d09e-437c-caf6-371ebe71d64d" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "10" + ] + }, + "metadata": {}, + "execution_count": 85 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wvjafOzENagk" + }, + "source": [ + "## Submission\n", + "\n", + "Congratulations on making it this far! You've reached the end of this assignment, and you just completed your first real-world data analysis problem.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LBOb5DhHNagl", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "cbac7b96-0022-4e4c-b63d-bcf945c5dd5a" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Thank you so much\n" + ] + } + ], + "source": [ + "print(\"Thank you so much\")" + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "NX9I-esQTSao" + }, + "execution_count": null, + "outputs": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.6" + }, + "colab": { + "provenance": [], + "include_colab_link": true + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file From 6ef7225b10bb032329fa412772c6e4b67125e069 Mon Sep 17 00:00:00 2001 From: cgoyal23 <167020791+cgoyal23@users.noreply.github.com> Date: Sat, 25 May 2024 06:52:27 +0530 Subject: [PATCH 3/3] Update 230335.ipynb dscdc --- 230335.ipynb | 762 +-------------------------------------------------- 1 file changed, 1 insertion(+), 761 deletions(-) diff --git a/230335.ipynb b/230335.ipynb index d92281b..8b13789 100644 --- a/230335.ipynb +++ b/230335.ipynb @@ -1,761 +1 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "view-in-github", - "colab_type": "text" - }, - "source": [ - "\"Open" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "3eJmduQENaek" - }, - "source": [ - "# Assignment 1.1 - Pandas Data Analysis Practice\n", - "\n", - "*This assignment is a part of the project [\"Customer Churn Analysis: From Data To Strategy\"]*\n", - "\n", - "In this assignment, you'll get to practice some of the concepts and skills covered in pandas.\n", - "\n", - "As you go through this notebook, you will find a **???** in certain places. To complete this assignment, you must replace all the **???** with appropriate values, expressions or statements to ensure that the notebook runs properly end-to-end.\n", - "\n", - "Some things to keep in mind:\n", - "\n", - "* Do not change variable names, delete cells or disturb other existing code. It may cause problems during evaluation.\n", - "* In some cases, you may need to add some code cells or new statements before or after the line of code containing the **???**.\n", - "* Questions marked **(Optional)** will not be considered for evaluation, and can be skipped. They are for your learning.\n", - "\n", - "\n", - "If you are stuck, you can ask for help on the Whatsapp group. Please do not use **ChatGPT or any other LLM to get solutions to the questions.**\n", - "\n", - "\n", - "Deadline : **22 May, 11:59 pm**\n" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "id": "nzCoPd_zNae4" - }, - "outputs": [], - "source": [ - "import pandas as pd" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "yKwMduIXNae5" - }, - "source": [ - "In this assignment, we're going to analyze an operate on data from a CSV file. Let's begin by downloading the CSV file." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Xa18DHWvNae8" - }, - "source": [ - "Let's load the data from the CSV file into a Pandas data frame." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "id": "2QPUhz19Nae-", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 287 - }, - "outputId": "9aa4e1b9-a78a-4ab6-f09c-3ad9fd4bb91d" - }, - "outputs": [ - { - "output_type": "error", - "ename": "FileNotFoundError", - "evalue": "[Errno 2] No such file or directory: 'countries.csv'", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mcountries_df\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'countries.csv'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pandas/io/parsers/readers.py\u001b[0m in \u001b[0;36mread_csv\u001b[0;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[0m\n\u001b[1;32m 910\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkwds_defaults\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 911\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 912\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_read\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 913\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 914\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pandas/io/parsers/readers.py\u001b[0m in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m 575\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 576\u001b[0m \u001b[0;31m# Create the parser.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 577\u001b[0;31m \u001b[0mparser\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mTextFileReader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 578\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 579\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mchunksize\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0miterator\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pandas/io/parsers/readers.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[1;32m 1405\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1406\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhandles\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mIOHandles\u001b[0m \u001b[0;34m|\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1407\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_make_engine\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mengine\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1408\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1409\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pandas/io/parsers/readers.py\u001b[0m in \u001b[0;36m_make_engine\u001b[0;34m(self, f, engine)\u001b[0m\n\u001b[1;32m 1659\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m\"b\"\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1660\u001b[0m \u001b[0mmode\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;34m\"b\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1661\u001b[0;31m self.handles = get_handle(\n\u001b[0m\u001b[1;32m 1662\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1663\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pandas/io/common.py\u001b[0m in \u001b[0;36mget_handle\u001b[0;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[1;32m 857\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mioargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mencoding\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;34m\"b\"\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mioargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmode\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 858\u001b[0m \u001b[0;31m# Encoding\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 859\u001b[0;31m handle = open(\n\u001b[0m\u001b[1;32m 860\u001b[0m \u001b[0mhandle\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 861\u001b[0m \u001b[0mioargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmode\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'countries.csv'" - ] - } - ], - "source": [ - "countries_df = pd.read_csv('countries.csv')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "7_kR3k1wNae_" - }, - "outputs": [], - "source": [ - "countries_df" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "KE8FlOjINafA" - }, - "source": [ - "**Q1: How many countries does the dataframe contain?**\n", - "\n", - "Hint: Use the `.shape` method." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ZD4-AM-CNafB" - }, - "outputs": [], - "source": [ - "num_countries = countries_df.shape[0]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Iy8SMfkwNafC" - }, - "outputs": [], - "source": [ - "print('There are {} countries in the dataset'.format(num_countries))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "m9eCXKMCNafE" - }, - "source": [ - "**Q2: Retrieve a list of continents from the dataframe?**\n", - "\n", - "*Hint: Use the `.unique` method of a series.*" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "xL8zFetJNafF" - }, - "outputs": [], - "source": [ - "continents = countries_df['continent'].unique()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "40j7HkWZNafG" - }, - "outputs": [], - "source": [ - "continents" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "yr9rdD5ZNafK" - }, - "source": [ - "**Q3: What is the total population of all the countries listed in this dataset?**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "IrGeG0vwNafL" - }, - "outputs": [], - "source": [ - "total_population = countries_df['population'].sum()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "FUJRPPMcNafM" - }, - "outputs": [], - "source": [ - "print('The total population is {}.'.format(int(total_population)))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "nvs9htXuNafO" - }, - "source": [ - "**Q: (Optional) What is the overall life expectancy across in the world?**\n", - "\n", - "*Hint: You'll need to take a weighted average of life expectancy using populations as weights.*" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "8TXTdZ0ENafP" - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "97C4RlcPNafP" - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "H1H_0zo8NafR" - }, - "source": [ - "**Q4: Create a dataframe containing 10 countries with the highest population.**\n", - "\n", - "*Hint: Chain the `sort_values` and `head` methods.*" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "3BBebkbyNafR" - }, - "outputs": [], - "source": [ - "most_populous_df = countries_df.sort_values('population', ascending=False).head(10)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "L83PYtCaNafS" - }, - "outputs": [], - "source": [ - "most_populous_df" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "RldAR4WJNafT" - }, - "source": [ - "**Q5: Add a new column in `countries_df` to record the overall GDP per country (product of population & per capita GDP).**\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "R_mdT6GMNafU" - }, - "outputs": [], - "source": [ - "countries_df['gdp'] = countries_df['population'] * countries_df['gdp_per_capita']" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "FLydP8pHNafV" - }, - "outputs": [], - "source": [ - "countries_df" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "RXtXfNfBNafX" - }, - "source": [ - "**Q: (Optional) Create a dataframe containing 10 countries with the lowest GDP per capita, among the counties with population greater than 100 million.**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "EkCt9Wl7NafX" - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "nM5flE-QNafY" - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "_NA6oQnVNafZ" - }, - "source": [ - "**Q6: Create a data frame that counts the number countries in each continent?**\n", - "\n", - "*Hint: Use `groupby`, select the `location` column and aggregate using `count`.*" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Co-tfE7BNafv" - }, - "outputs": [], - "source": [ - "country_counts_df = countries_df.groupby('continent')['location'].count()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "6fVwjKFFNafw" - }, - "outputs": [], - "source": [ - "country_counts_df" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Ut6cNp0yNafy" - }, - "source": [ - "**Q7: Create a data frame showing the total population of each continent.**\n", - "\n", - "*Hint: Use `groupby`, select the population column and aggregate using `sum`.*" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Hqnwb5V0Nafy" - }, - "outputs": [], - "source": [ - "continent_populations_df = countries_df.groupby('continent')['population'].sum()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "D5gAu5IANafz" - }, - "outputs": [], - "source": [ - "continent_populations_df" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Am9h8-R4Naf1" - }, - "source": [ - "Let's download another CSV file containing overall Covid-19 stats for various countires, and read the data into another Pandas data frame." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "sOybiAeVNaf6" - }, - "outputs": [], - "source": [ - "covid_data_df = pd.read_csv('covid-countries-data.csv')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true, - "id": "k9PuPoYUNaf7" - }, - "outputs": [], - "source": [ - "covid_data_df" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "weKZowz0Naf8" - }, - "source": [ - "**Q8: Count the number of countries for which the `total_tests` data is missing.**\n", - "\n", - "*Hint: Use the `.isna` method.*" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "UybZmvdFNaf9" - }, - "outputs": [], - "source": [ - "total_tests_missing = covid_data_df['total_tests'].isna().sum()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "0_jz32NxNaf-" - }, - "outputs": [], - "source": [ - "print(\"The data for total tests is missing for {} countries.\".format(int(total_tests_missing)))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "bK9yyH3GNagA" - }, - "source": [ - "Let's merge the two data frames, and compute some more metrics.\n", - "\n", - "**Q9: Merge `countries_df` with `covid_data_df` on the `location` column.**\n", - "\n", - "*Hint: Use the `.merge` method on `countries_df`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "6qu0tGtJNagB" - }, - "outputs": [], - "source": [ - "combined_df = countries_df.merge(covid_data_df, on='location')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "yNR9WKaENagC" - }, - "outputs": [], - "source": [ - "combined_df" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "DMFIKAXvNagG" - }, - "source": [ - "**Q10: Add columns `tests_per_million`, `cases_per_million` and `deaths_per_million` into `combined_df`.**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "_KdWa_juNagH" - }, - "outputs": [], - "source": [ - "combined_df['tests_per_million'] = combined_df['total_tests'] * 1e6 / combined_df['population']" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "IDdn-5CONagI" - }, - "outputs": [], - "source": [ - "combined_df['cases_per_million'] = combined_df['total_cases'] * 1e6 / combined_df['population']" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "93n1py5BNagK" - }, - "outputs": [], - "source": [ - "combined_df['deaths_per_million'] = combined_df['total_deaths'] * 1e6 / combined_df['population']" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "IRMupBBUNagL" - }, - "outputs": [], - "source": [ - "combined_df" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "VNjlGq-uNagO" - }, - "source": [ - "**Q11: Create a dataframe with 10 countires that have highest number of tests per million people.**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "HBcHEMJZNagO" - }, - "outputs": [], - "source": [ - "highest_tests_df = combined_df.sort_values('tests_per_million', ascending=False).head(10)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "R57UrLhxNagR" - }, - "outputs": [], - "source": [ - "highest_tests_df" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ug-VTfalNagT" - }, - "source": [ - "**Q12: Create a dataframe with 10 countires that have highest number of positive cases per million people.**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "HxjUJq8oNagU" - }, - "outputs": [], - "source": [ - "highest_cases_df = ???" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "7EuO7nPENagY" - }, - "outputs": [], - "source": [ - "highest_cases_df" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "GX-WdUFHNagb" - }, - "source": [ - "**Q13: Create a dataframe with 10 countires that have highest number of deaths cases per million people?**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "hH19qct0Nagc" - }, - "outputs": [], - "source": [ - "highest_deaths_df = ???" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "lWhydo7ENagd" - }, - "outputs": [], - "source": [ - "highest_deaths_df" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "nUTDc8q4Nage" - }, - "source": [ - "**(Optional) Q: Count number of countries that feature in both the lists of \"highest number of tests per million\" and \"highest number of cases per million\".**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "nXat9wCrNagf" - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Z7y8D9OtNagg" - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "aJXjCbJnNagg" - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "uHWghQHUNagh" - }, - "source": [ - "**(Optional) Q: Count number of countries that feature in both the lists \"20 countries with lowest GDP per capita\" and \"20 countries with the lowest number of hospital beds per thousand population\". Only consider countries with a population higher than 10 million while creating the list.**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "5C7khtW3Nagi" - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "RKk0r3umNagj" - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "XH7POtliNagj" - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "wvjafOzENagk" - }, - "source": [ - "## Submission\n", - "\n", - "Congratulations on making it this far! You've reached the end of this assignment, and you just completed your first real-world data analysis problem.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "LBOb5DhHNagl" - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.6" - }, - "colab": { - "provenance": [], - "include_colab_link": true - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file +