File:NZ opinion polls 2017-2020-minorparties.png

NZ_opinion_polls_2017-2020-minorparties.png(778 × 487 pixels, file size: 9 KB, MIME type: image/png)

Captions

Captions

Add a one-line explanation of what this file represents

Summary edit

Description
English: minor parties
Date
Source Own work
Author Limegreen

Licensing edit

I, the copyright holder of this work, hereby publish it under the following license:
w:en:Creative Commons
attribution share alike
This file is licensed under the Creative Commons Attribution-Share Alike 4.0 International license.
You are free:
  • to share – to copy, distribute and transmit the work
  • to remix – to adapt the work
Under the following conditions:
  • attribution – You must give appropriate credit, provide a link to the license, and indicate if changes were made. You may do so in any reasonable manner, but not in any way that suggests the licensor endorses you or your use.
  • share alike – If you remix, transform, or build upon the material, you must distribute your contributions under the same or compatible license as the original.
 
This chart was created with R.

Figure is produced using the R statistical package, using the following code. It first reads the HTML directly from the website, then parses the data and saves the graph into your working directory. It should be able to be run directly by anyone with R.

rm(list=ls())
#require(mgcv)
library(tidyverse)

#==========================================
#Parameters - specified as a list
opts <- list()
opts$major <- list(parties= c("GRN", "LAB", "NAT", "NZF"),   #use precise names from Table headers
                   ylims = c(0,65),   #Vertical range
                   fname= "NZ_opinion_polls_2017-2020-majorparties.png",
                   dp=0)  #Number of decimal places to round estimates to
opts$minor <- list(parties=c("ACT","TOP", "MRI","NCP"   #please use "Maori" for the Maori party
                   ),
                   ylims = c(0,6),   #Vertical range
                   fname = "NZ_opinion_polls_2017-2020-minorparties.png",
                   dp=1) #Number of decimal places to round estimates to

#==========================================
#Shouldn't need to edit anything below here
#==========================================

#Load the complete HTML file into memory

html <- readLines("http://en.wikipedia.org/wiki/Opinion_polling_for_the_next_New_Zealand_general_election",encoding="UTF-8")
closeAllConnections()

#Extract the opinion poll data table
tbl.no <- 1
tbl <- html[(grep("<table.*",html)[tbl.no]):(grep("</table.*",html)[tbl.no])]

#Now split it into the rows, based on the <tr> tag
tbl.rows <- list()
open.tr <- grep("<tr",tbl)
close.tr <- grep("</tr",tbl)
for(i in 1:length(open.tr)) tbl.rows[[i]] <- tbl[open.tr[i]:close.tr[i]]

#Extract table headers
hdrs <- grep("<th",tbl,value=TRUE)
party.names <- gsub("<.*?>","",hdrs)[-c(1:3, 12)] %>% #nasty hack
  gsub(" ","_",.) %>% #Replace space with a _ 
  gsub("M.{1}ori","Maori",.) #Apologies, but the hard "a" is too hard to handle otherwise
  
#extract party colours
# party.cols <- str_extract(hdrs, "(?<=color:).{7}") %>%
#   na.omit()

party.cols <- c("#00529F", "#D82A20", "#000000", "#098137", 
                "#FDE401", "#800080", "#800000",   "#6698FF")
names(party.cols) <- party.names

names(party.cols) <- party.names

#Extract data rows
tbl.rows <- tbl.rows[sapply(tbl.rows,function(x) length(grep("<td",x)))>1]

#Now extract the data
dat <- tbl.rows
dat <- lapply(dat, function(x) x[c(7:14)])
dat <- unlist(dat)
dat <- gsub("<.{1,3}>", "", dat)
dat <- gsub("<.*>", "", dat)
dat <- as.numeric(dat)
survey.dat <- matrix(dat, nrow = length(tbl.rows), byrow = TRUE) %>%
  as.data.frame()
names(survey.dat) = party.names

#get survey dates
date.str <- lapply(tbl.rows, function(x) x[2])
date.str <- str_extract(date.str, '".*"')
date.str <- gsub('\\"', '', date.str)
date.str <- strtrim(date.str, 10)

survey.date <- strptime(date.str, format = "%Y-%m-%d")

#get survey company
company <- lapply(tbl.rows, function(x) x[4]) %>%
  unlist()
company <- str_extract(company, '(?=">).*')
company <- gsub('\\">', '', company)
company <- strsplit(company, '<') %>%
  lapply(function(x) x[1]) %>%
  unlist()

#Combine results
surveys <- cbind(survey.date, company, survey.dat) %>%
  rename(Date = survey.date, Company = company) %>%
  filter(NAT > 0)



#Ugly fix to remove Opportunities party while not enough data
# surveys <- select(surveys, -TOP)


#==========================================
#Now generate each plot
#==========================================


smoothers  <- list()
for(opt in opts) {
  
  #Restrict data to selected parties
  selected.parties <- gsub(" ","_",sort(opt$parties))
  selected.cols <- party.cols[selected.parties]
  plt.dat   <- surveys[,c("Company","Date",selected.parties)]
  plt.dat <- subset(plt.dat,!is.na(surveys$Date))
  plt.dat <- plt.dat[order(plt.dat$Date),]
  plt.dat$date.num  <- as.double(plt.dat$Date)
  plt.dat <- subset(plt.dat,Company!="2017 election result")
  plt.dat$Company <- factor(plt.dat$Company)
  
  #Setup plot
  ticks <- ISOdate(c(rep(2017,1),rep(2018,2),rep(2019,2),rep(2020,2),2021),c(rep(c(7,1),4)),1)
  xlims <- range(c(ISOdate(2017,11,1),ticks))
  png(opt$fname,width=778,height=487,pointsize=16)
  par(mar=c(5.5,4,1,1))
  matplot(plt.dat$date.num,plt.dat[,selected.parties],pch=NA,xlim=xlims,ylab="Party support (%)",
          xlab="",col=selected.cols,xaxt="n",ylim=opt$ylims,yaxs="i")
  abline(h=seq(0,95,by=5),col="lightgrey",lty=3)
  abline(v=as.double(ticks),col="lightgrey",lty=3)
  abline(v=1506121200, col = "black", lty = 1)
  box()
  axis(1,at=as.double(ticks),labels=format(ticks,format="1 %b\n%Y"),cex.axis=0.8)
  axis(4,at=axTicks(4),labels=rep("",length(axTicks(4))))
  
  smoothed <- list()
  predict.x <- seq(min(surveys$Date),max(surveys$Date),length.out=100)
  for(i in 1:length(selected.parties)) {
    smoother <- loess(surveys[,selected.parties[i]] ~ as.numeric(surveys[,"Date"]),span=0.5)
    smoothed[[i]] <- predict(smoother,newdata=predict.x,se=TRUE)
    poly.data <- data.frame(c(predict.x, rev(predict.x)), 
                            c(smoothed[[i]]$fit+smoothed[[i]]$se.fit*1.96,rev(smoothed[[i]]$fit-smoothed[[i]]$se.fit*1.96))) %>%
      na.omit() %>%
      polygon(col=rgb(0.5,0.5,0.5,0.5),border=NA, fillOddEven = TRUE)
  }
  names(smoothed) <- selected.parties
  #Then add the data points
  matpoints(surveys$Date, surveys[,selected.parties],pch=20,col=selected.cols)
  #And finally the smoothers themselves
  for(i in 1:length(selected.parties)) {
    lines(predict.x,smoothed[[i]]$fit,col=selected.cols[i],lwd=2)
  }
  
  # #Then add the data points
  # matpoints(plt.dat$date.num,plt.dat[,selected.parties],pch=20,col=selected.cols)
  # #And finally the smoothers themselves
  # for(n in selected.parties) {
  #   lines(smoothed.l[[n]]$date,smoothed.l[[n]]$fit,col=selected.cols[n],lwd=2)
  # }
  
  n.parties <- length(selected.parties)
  legend(grconvertX(0.5,"npc"),grconvertY(0.0,"ndc"),xjust=0.5,yjust=0,
         legend=gsub("_"," ",selected.parties), col=selected.cols,
         pch=20,bg="white",lwd=2,
         ncol=ifelse(n.parties>4,ceiling(n.parties/2),n.parties),xpd=NA)
  #Add best estimates
  # fmt.str <- sprintf("%%2.%if\261%%1.%if %%%%",opt$dp,opt$dp)
  # for(n in names(smoothed)) {
  #   lbl <- sprintf(fmt.str,
  #                  round(rev(smoothed[[n]]$fit)[1],opt$dp),
  #                  round(1.96*rev(smoothed[[n]]$se.fit)[1],opt$dp)
  #                  )
  #   text(rev(plt.dat$date.num)[1],rev(smoothed[[n]]$fit)[1],
  #        labels=lbl,pos=4,col=selected.cols[n],xpd=NA)
  # }
  dev.off()
}

#==========================================
#Finished!
#==========================================

cat("Complete.\n")

File history

Click on a date/time to view the file as it appeared at that time.

(newest | oldest) View (newer 10 | ) (10 | 20 | 50 | 100 | 250 | 500)
Date/TimeThumbnailDimensionsUserComment
current09:49, 21 May 2020Thumbnail for version as of 09:49, 21 May 2020778 × 487 (9 KB)Limegreen (talk | contribs)add new poll
09:18, 18 May 2020Thumbnail for version as of 09:18, 18 May 2020778 × 487 (10 KB)Limegreen (talk | contribs)add new poll
16:09, 13 February 2020Thumbnail for version as of 16:09, 13 February 2020778 × 487 (10 KB)Limegreen (talk | contribs)next new poll
22:54, 12 February 2020Thumbnail for version as of 22:54, 12 February 2020778 × 487 (9 KB)Limegreen (talk | contribs)add first poll of 2020
22:39, 2 December 2019Thumbnail for version as of 22:39, 2 December 2019778 × 487 (9 KB)Limegreen (talk | contribs)add 2 new polls
09:48, 15 October 2019Thumbnail for version as of 09:48, 15 October 2019778 × 487 (9 KB)Limegreen (talk | contribs)another new poll
20:33, 13 October 2019Thumbnail for version as of 20:33, 13 October 2019778 × 487 (9 KB)Limegreen (talk | contribs)add latest reid poll
11:10, 2 August 2019Thumbnail for version as of 11:10, 2 August 2019778 × 487 (9 KB)Limegreen (talk | contribs)add new poll
09:49, 9 June 2019Thumbnail for version as of 09:49, 9 June 2019778 × 487 (9 KB)Limegreen (talk | contribs)add two new polls
11:19, 15 April 2019Thumbnail for version as of 11:19, 15 April 2019778 × 487 (9 KB)Limegreen (talk | contribs)add new polls. fix shading
(newest | oldest) View (newer 10 | ) (10 | 20 | 50 | 100 | 250 | 500)

There are no pages that use this file.

File usage on other wikis

The following other wikis use this file: