{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#import numpy as np\n",
    "from scipy.special import xlogy\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "from numpy import log2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Nice version of p*log_2(p) to deal with zeros \n",
    "log2_coeff= 1/xlogy(1,2) #calculate change to base 2\n",
    "def plog2p(p):\n",
    "    return xlogy(p,p)*log2_coeff"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = pd.read_csv('single_counts.csv', index_col=0)\n",
    "data.plot.bar() #Plot the data\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"Equal-distribution entropy: \"+ str(-log2(1/27)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "single_data = pd.read_csv('single_counts.csv', index_col=0)\n",
    "single_count_data = single_data['Count']\n",
    "single_prob = single_count_data/sum(single_count_data)\n",
    "single_entropy = -sum(plog2p(single_prob))\n",
    "print(\"Single-character entropy: \" + str(single_entropy))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "double_data = pd.read_csv('double_counts.csv', index_col=0)\n",
    "double_count_data = double_data['Count']\n",
    "double_prob = double_count_data/sum(double_count_data)\n",
    "double_entropy = -sum(plog2p(double_prob))\n",
    "print(\"Double-character entropy: \" + str(double_entropy))\n",
    "print(\"Second-character information: \"+ str(2*single_entropy - double_entropy))\n",
    "print(\"Second-character conditional entropy: \"+ str(double_entropy-single_entropy))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "triple_data = pd.read_csv('triple_counts.csv', index_col=0)\n",
    "triple_count_data = triple_data['Count']\n",
    "triple_prob = triple_count_data/sum(triple_count_data)\n",
    "triple_entropy = -sum(plog2p(triple_prob))\n",
    "print(\"Triple-character entropy: \" + str(triple_entropy))\n",
    "print(\"Third-character conditional entropy: \"+ str(triple_entropy-double_entropy))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "quadruple_data = pd.read_csv('quadruple_counts.csv', index_col=0)\n",
    "quadruple_count_data = quadruple_data['Count']\n",
    "quadruple_prob = quadruple_count_data/sum(quadruple_count_data)\n",
    "quadruple_entropy = -sum(plog2p(quadruple_prob))\n",
    "print(\"Four-character entropy: \" + str(quadruple_entropy))\n",
    "print(\"Fourth-character conditional entropy: \"+ str(quadruple_entropy - triple_entropy))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.0"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
