Tracking German election polling surveys
Polling surveys of German parties with data aggregated daily from different opinion research centers
The data for this script is pulled from Below the code, you can find the output figures.
import pandas as pd
import matplotlib.pyplot as plt
import datetime
import matplotlib.dates as mdates
from scipy.ndimage import gaussian_filter1d
table_urls = ["",
#"" # broken data
names = ["Allensbach", "Kantar", "Forsa" "Forsch’gr. Wahlen", "GMS", "Infratest", "dimap", "Yougov", "INSA"]
# get the election results
election_results = pd.read_html("")[1]
election_results["percent"] = election_results["Bundes-tagswahl"].iloc[1:].str.replace(',', '.').str.rstrip('%').astype('float')
election_results = election_results.set_index("Institut")
# get polling results
party_to_watch = "AfD"
def get_party_results(party_to_watch = "AfD"):
print(f"---------- {party_to_watch} ----------")
plt.figure(figsize=(5, 3), dpi=300)
for i, table_url in enumerate(table_urls):
# hotfix for linke table entry
party_name = "LINKE" if party_to_watch == "DIE LINKE" else party_to_watch
print(f"Getting data from {names[i]} ...")
# magic function to get tables from a website, <3
df = pd.read_html(table_url)[1]
# cut last 4 lines which are trash
df = df.iloc[:-4]
df = df.replace("–")
# convert to datetime
df["Datetime"] = pd.to_datetime(df["Datum"], format="%d.%m.%Y") if "Datum" in df.columns else pd.to_datetime(df["Unnamed: 0"], format="%d.%m.%Y")
# get rid of trash
df = df.drop(columns=["Unnamed: 0", "Unnamed: 1"])
# list of parties
parties = df.columns[:6]
for party in parties:
df[party] = df[party].str.replace(',', '.').str.rstrip('%').astype('float')
smoothed = gaussian_filter1d(df[party_name], 1)
print(f"\tLast value: {df[party_name].iloc[0]}%")
print(f"\tLast datapoint: {df['Datetime'].iloc[0].date()}")
plt.plot(df["Datetime"], smoothed,label=names[i], c="C" + str(i))
plt.plot(df["Datetime"], df[party_name], ls=':', lw=1, c="C" + str(i))
from_weeks = 24 * 4
to_weeks = 4
# add line that marks results from last elections
plt.hlines(election_results.loc[party_to_watch, "percent"], - datetime.timedelta(weeks=from_weeks), + datetime.timedelta(weeks=to_weeks), color='k', ls='--', label=f'{party_to_watch} Bundestagswahl 2017')
# adjust plot settings
plt.title(f"Watch the {party_to_watch} go pew")
plt.xlim([ - datetime.timedelta(weeks=from_weeks), + datetime.timedelta(weeks=to_weeks)])
myFmt = mdates.DateFormatter("%d.%m.%Y")
plt.legend(fontsize=8, bbox_to_anchor=(1.0, 1.0))
plt.ylabel("Polling result [%]")
plt.setp(plt.gca().get_xticklabels()[::2], visible=False)
plt.savefig(f"../images/icon_{party_to_watch.replace('/', '_').replace(' ' , '_').lower()}.png");