#!/usr/bin/env python
# coding: utf-8

# In[18]:


import pandas as pd
import numpy as np
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"


# In[19]:


def index_prev_next(ix):    
    ix_prev = [i-1 for i in ix]
    ix_next = [i+1 for i in ix]
    ix_present = ix_prev + list(ix) + list(ix_next)
    ix_present.sort()
    return ix_present


# ## Data Prep

# In[20]:


path = "../data_files/Election/"
csv_file = "nyt_ts.csv"
csv_path = path + csv_file

df_nyt_ts = pd.read_csv(csv_path)


# In[21]:


df_nyt_ts.timestamp = pd.to_datetime(df_nyt_ts.timestamp)


# In[22]:


df_nyt_ts = df_nyt_ts.query("votes > 0")


# In[23]:


df_nyt_ts['votes_rep'] = df_nyt_ts.votes * df_nyt_ts.vote_share_rep
df_nyt_ts['votes_dem'] = df_nyt_ts.votes * df_nyt_ts.vote_share_dem


# In[24]:


df_nyt_ts.sort_values(['state','timestamp'], inplace=True)
df_nyt_ts.reset_index(inplace=True, drop=True)


# In[25]:


vote_cols = ['votes','votes_rep','votes_dem']
vote_diff_cols = [c+"_diff" for c in vote_cols]

df_nyt_ts[vote_diff_cols] = df_nyt_ts.groupby(['state'])[vote_cols].diff()


# In[26]:


df_nyt_ts['compare_rep'] = df_nyt_ts.eval("votes_rep_diff / votes_diff")
df_nyt_ts['compare_dem'] = df_nyt_ts.eval("votes_dem_diff / votes_diff")


# ## Biased Vote Dump

# In[27]:


cols = ['state','timestamp','vote_share_rep','vote_share_dem','votes',
        'votes_rep','votes_dem','votes_diff','votes_rep_diff','votes_dem_diff']


# In[28]:


threshold_dump = 10**5
threshold_compare = 0.1


# ### Unfavotable for Trump

# In[30]:


df = df_nyt_ts.query("(votes_diff > @threshold_dump) & (compare_rep < @threshold_compare)")
ix_present = index_prev_next(df.index)
df_xlsx = df_nyt_ts.loc[ix_present][cols + ['compare_rep']]
df_xlsx
df_xlsx.to_excel("vote_dump_against_trump.xlsx")


# ### Unfavotable for Biden

# In[31]:


df = df_nyt_ts.query("(votes_diff > @threshold_dump) & (compare_dem < @threshold_compare)")
ix_present = index_prev_next(df.index)
df_xlsx = df_nyt_ts.loc[ix_present][cols + ['compare_rep']]
df_xlsx
df_xlsx.to_excel("vote_dump_against_biden.xlsx")


# ## Vote Switch

# In[32]:


flt = (df_nyt_ts.votes_rep_diff == df_nyt_ts.votes_dem_diff*-1)
flt2 = (df_nyt_ts.votes_rep_diff != 0)
df = df_nyt_ts[flt & flt2].copy()
ix_present = index_prev_next(df.index)
df_xlsx = df_nyt_ts.reindex(ix_present)[cols]
df_xlsx
df_xlsx.to_excel("vote_switch.xlsx")


# In[33]:


threshold_switch = 20
flt = np.abs(df_nyt_ts.votes_rep_diff - df_nyt_ts.votes_dem_diff*-1) < threshold_switch
flt2 = (df_nyt_ts.votes_rep_diff != 0)
flt3 = np.sign(df_nyt_ts.votes_rep_diff * df_nyt_ts.votes_dem_diff) < 0
df = df_nyt_ts[flt & flt2 & flt3].copy()
ix_present = index_prev_next(df.index)
df_xlsx = df_nyt_ts.reindex(ix_present)[cols]
df_xlsx
df_xlsx.to_excel("vote_switch_within_20.xlsx")


# ## Votes Decrease

# In[36]:


thresh_vote_dec = -10**5
ix_diff_neg = df_nyt_ts.query("(votes_diff < @thresh_vote_dec)").index
ix_present = index_prev_next(ix_diff_neg)
df_xlsx = df_nyt_ts.loc[ix_present,cols]
df_xlsx
df_xlsx.to_excel("vote_decrease.xlsx")