#!/usr/bin/env python
# coding: utf-8
# In[18]:
import pandas as pd
import numpy as np
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
# In[19]:
def index_prev_next(ix):
ix_prev = [i-1 for i in ix]
ix_next = [i+1 for i in ix]
ix_present = ix_prev + list(ix) + list(ix_next)
ix_present.sort()
return ix_present
# ## Data Prep
# In[20]:
path = "../data_files/Election/"
csv_file = "nyt_ts.csv"
csv_path = path + csv_file
df_nyt_ts = pd.read_csv(csv_path)
# In[21]:
df_nyt_ts.timestamp = pd.to_datetime(df_nyt_ts.timestamp)
# In[22]:
df_nyt_ts = df_nyt_ts.query("votes > 0")
# In[23]:
df_nyt_ts['votes_rep'] = df_nyt_ts.votes * df_nyt_ts.vote_share_rep
df_nyt_ts['votes_dem'] = df_nyt_ts.votes * df_nyt_ts.vote_share_dem
# In[24]:
df_nyt_ts.sort_values(['state','timestamp'], inplace=True)
df_nyt_ts.reset_index(inplace=True, drop=True)
# In[25]:
vote_cols = ['votes','votes_rep','votes_dem']
vote_diff_cols = [c+"_diff" for c in vote_cols]
df_nyt_ts[vote_diff_cols] = df_nyt_ts.groupby(['state'])[vote_cols].diff()
# In[26]:
df_nyt_ts['compare_rep'] = df_nyt_ts.eval("votes_rep_diff / votes_diff")
df_nyt_ts['compare_dem'] = df_nyt_ts.eval("votes_dem_diff / votes_diff")
# ## Biased Vote Dump
# In[27]:
cols = ['state','timestamp','vote_share_rep','vote_share_dem','votes',
'votes_rep','votes_dem','votes_diff','votes_rep_diff','votes_dem_diff']
# In[28]:
threshold_dump = 10**5
threshold_compare = 0.1
# ### Unfavotable for Trump
# In[30]:
df = df_nyt_ts.query("(votes_diff > @threshold_dump) & (compare_rep < @threshold_compare)")
ix_present = index_prev_next(df.index)
df_xlsx = df_nyt_ts.loc[ix_present][cols + ['compare_rep']]
df_xlsx
df_xlsx.to_excel("vote_dump_against_trump.xlsx")
# ### Unfavotable for Biden
# In[31]:
df = df_nyt_ts.query("(votes_diff > @threshold_dump) & (compare_dem < @threshold_compare)")
ix_present = index_prev_next(df.index)
df_xlsx = df_nyt_ts.loc[ix_present][cols + ['compare_rep']]
df_xlsx
df_xlsx.to_excel("vote_dump_against_biden.xlsx")
# ## Vote Switch
# In[32]:
flt = (df_nyt_ts.votes_rep_diff == df_nyt_ts.votes_dem_diff*-1)
flt2 = (df_nyt_ts.votes_rep_diff != 0)
df = df_nyt_ts[flt & flt2].copy()
ix_present = index_prev_next(df.index)
df_xlsx = df_nyt_ts.reindex(ix_present)[cols]
df_xlsx
df_xlsx.to_excel("vote_switch.xlsx")
# In[33]:
threshold_switch = 20
flt = np.abs(df_nyt_ts.votes_rep_diff - df_nyt_ts.votes_dem_diff*-1) < threshold_switch
flt2 = (df_nyt_ts.votes_rep_diff != 0)
flt3 = np.sign(df_nyt_ts.votes_rep_diff * df_nyt_ts.votes_dem_diff) < 0
df = df_nyt_ts[flt & flt2 & flt3].copy()
ix_present = index_prev_next(df.index)
df_xlsx = df_nyt_ts.reindex(ix_present)[cols]
df_xlsx
df_xlsx.to_excel("vote_switch_within_20.xlsx")
# ## Votes Decrease
# In[36]:
thresh_vote_dec = -10**5
ix_diff_neg = df_nyt_ts.query("(votes_diff < @thresh_vote_dec)").index
ix_present = index_prev_next(ix_diff_neg)
df_xlsx = df_nyt_ts.loc[ix_present,cols]
df_xlsx
df_xlsx.to_excel("vote_decrease.xlsx")
