Commit c2b4944c authored by Cecylia Bocovich's avatar Cecylia Bocovich
Browse files

Cleaned up Rscripts to plot informative graphs

parent 665da5e3
......@@ -9,75 +9,17 @@ for (filename in args) {
x <- rbind(x, fread(filename))
}
x$timestamp <- as.POSIXct(x$timestamp, tz="UTC")
# Filter out the times the cn VPN wasn't working
# (otherwise it looks like timeouts; i.e. blocking)
x.max <- x[ , .SD[which.max(percent)], by=.(site, runid, ip)]
setkey(x.max, site, runid, ip)
cat("
{{{#!html
<table class=\"wiki\">
<tr><th>bridge</th><th>CA average bootstrap %</th><th>CN average bootstrap %</th></tr>
")
ramp <- colorRamp(c("#d6756b", "#f7fbff"))
summ <- x.max[, .(.N, avg.percent=mean(percent)), by=.(site, ip)]
for (nick in unique(x$ip)) {
na <- summ[site=="na" & ip==nick]
cn <- summ[site=="cn" & ip==nick]
cat(sprintf("<tr><td>%s</td><td align=right style=\"background: %s\">%.2f%%</td><td align=right style=\"background: %s\">%.2f%%</td></tr>\n",
nick, rgb(ramp(na$avg.percent/100)/255), na$avg.percent, rgb(ramp(cn$avg.percent/100)/255), cn$avg.percent))
}
cat("</table>
}}}
")
pdf(width=8.5, height=14)
# runids <- unique(x$runid)
# runids <- runids[order(runids)]
# p <- ggplot(x[x$runid %in% runids[(length(runids)-2):(length(runids)-1)], ])
# p <- p + geom_step(aes(timestamp, percent, group=sprintf("%s-%s", runid, ip), color=ip))
# p <- p + scale_y_continuous(limits=c(0, 100), breaks=seq(0, 100, 10))
# p <- p + theme_bw()
# p
# p <- ggplot(x.max)
# p <- p + geom_point(aes(ip, percent, color=site), alpha=0.4, size=0.7, position=position_jitter(width=0.3, height=0))
# p <- p + scale_y_continuous(limits=c(0, 100))
# p <- p + coord_flip()
# p <- p + theme_bw()
# p <- p + guides(color=guide_legend(override.aes=list(alpha=1, size=2)))
# p
tmp <- x.max
tmp$site <- factor(tmp$site, levels=c("na", "cn"), labels=c("CA", "CN"))
p <- ggplot(tmp)
p <- p + geom_point(aes(timestamp, percent, color=site, shape=site, size=site), alpha=0.4)
p <- p + facet_grid(ip ~ .)
p <- p + scale_y_continuous(limits = c(0,105), breaks=c(20,40,60,80,100), labels=c("Gathering", "Signaling","Connecting","Data", "Done"))
p <- p + scale_color_brewer(palette="Set1")
p <- p + scale_shape_manual(values=c(CA=4, CN=16))
p <- p + scale_size_manual(values=c(CA=1.0, CN=1.0))
p <- p + theme_bw()
p <- p + theme(strip.text.y=element_text(angle=0))
p <- p + theme(legend.position="top")
p <- p + guides(color=guide_legend(override.aes=list(alpha=1, size=2.5)))
p
dev.off()
setDT(x)
x.max <- x[ , .SD[which.max(percent)], by=.(site, runid, ip)]
x.max <- x[ , .SD[which.max(percent)], by=.(site, runid, probeid)]
setkey(x.max, site, runid)
ggdata = data.frame(x = x.max$percent)
ggplot(ggdata, aes(x=x)) +
stat_ecdf(show.legend=FALSE) + labs(x='Snowflake stage (%)', y='CDF') +
geom_bar(stat="count", width= 10) + labs(x='Snowflake stage', y='Count') +
theme(text = element_text(size=12,family="Times")) + theme_bw() + theme(text = element_text(size=12,family="Times")) +
scale_x_continuous(limits = c(0,105), breaks=c(20,40,60,80,100), labels=c("Gathering", "Signaling","Connecting","Data", "Done"))
scale_x_continuous(limits=c(10,110), breaks=c(20,40,60,80,100), labels=c("Gathering", "Signaling","Connecting","Data", "Done"))
ggsave("stage.pdf",
width = 7,
......
......@@ -15,14 +15,11 @@ stage_re = re.compile(r'^stage:(.*)')
date_re = re.compile(r'^(\w+ \d+ \d\d:\d\d:\d\d\.\d\d\d)')
ip_re = re.compile(r'^Successfully connected to snowflake (.*)')
csvW = csv.DictWriter(sys.stdout, fieldnames=("timestamp", "site", "runid", "ip", "percent"))
csvW = csv.DictWriter(sys.stdout, fieldnames=("timestamp", "site", "runid", "probeid", "percent"))
csvW.writeheader()
stages = {'Gathering': 20, 'Signaling': 40, 'Connecting': 60, 'Data': 80, 'Done':100}
rows = []
def process_log(f, site, runid, nickname):
timestamp = datetime.datetime.strptime(runid, "%Y%m%d-%H%M")
ip = None
......@@ -42,14 +39,10 @@ def process_log(f, site, runid, nickname):
"timestamp": timestamp.strftime("%Y-%m-%d %H:%M:%S.%f"),
"site": site,
"runid": runid,
"ip": "",
"probeid": nickname,
"percent": percent,
}
rows.append(row)
for row in rows:
row['ip'] = ip
csvW.writerow(row)
csvW.writerow(row)
for filename in sys.stdin:
filename = filename.strip()
......
......@@ -27,6 +27,7 @@ ggsave("throughput.pdf",
width = 5,
height = 5)
dev.off()
setDT(bootstrap)
x.max <- bootstrap[ , .SD[which.max(percent)], by=.(site, runid, nickname)]
......@@ -35,13 +36,14 @@ setkey(x.max, site, runid)
ggdata = data.frame(x = x.max$percent)
ggplot(ggdata, aes(x=x)) +
stat_ecdf(show.legend=FALSE) + labs(x='Bootstrap progress (%)', y='CDF') +
theme(text = element_text(size=20,family="Times")) + theme_bw() + theme(text = element_text(size=20,family="Times"))
geom_bar(stat="count", width=5) + labs(x='Bootstrap progress (%)', y='Count') +
theme(text = element_text(size=20,family="Times")) + theme_bw() + theme(text = element_text(size=20,family="Times")) + scale_x_continuous(breaks=c(0,20,40,60,80,100))
ggsave("bootstrap.pdf",
width = 5,
height = 5)
dev.off()
print(paste("Number of failed snowflakes: ", length(x.max$percent[x.max$percent <= 10]), sep=""))
print(paste("Number of full bootstraps: ", length(x.max$percent[x.max$percent == 100]), sep=""))
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment