library(dplyr)
library(ggplot2);
library(RMySQL)
db <- dbConnect(MySQL(), host='', user='', password='', dbname='')

Custom Theme (based on https://rpubs.com/Koundy/71792)

theme_Publication <- function(base_size=10, base_family="Helvetica") {
      library(grid)
      library(ggthemes)
      (theme_foundation(base_size=base_size, base_family=base_family)
       + theme(plot.title = element_text(face = "bold",
                                         size = rel(1), hjust = 0.5),
               plot.subtitle = element_text(hjust = 0.5),
               text = element_text(),
               panel.background = element_rect(colour = NA),
               plot.background = element_rect(colour = NA),
               panel.border = element_rect(colour = NA),
               axis.title = element_text(face = "bold",size = rel(1)),
               axis.title.y = element_text(angle=90,vjust =2),
               axis.title.x = element_text(vjust = -0.2),
               axis.text = element_text(), 
               axis.line = element_line(colour="black"),
               axis.ticks = element_line(),
               panel.grid.major = element_line(colour="#f0f0f0"),
               panel.grid.minor = element_blank(),
               legend.key = element_rect(colour = NA),
               legend.position = "bottom",
               legend.direction = "horizontal",
               legend.key.size= unit(0.2, "cm"),
               legend.spacing = unit(0, "cm"),
               legend.title = element_text(face = "italic", size = rel(1)),
               plot.margin=unit(c(1,2,1,2),"mm"),
               strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
               strip.text = element_text(face="bold")
          ))
      
}
theme_set(theme_Publication())
scale_colour_discrete <- function(...) scale_color_brewer(..., palette="Set2")
legendInBox <- function(pos = c(0,0)) theme(legend.position = pos, legend.direction = "vertical", legend.box.background = element_rect(colour = "#000000"), legend.key.size = unit(0.9, "lines"))
update_geom_defaults("point", list(size = 0.7))

CA:VSIDS vs CA:RND

df <- dbGetQuery(db, "
  SELECT * FROM Experiment as main
  inner join Instance
    on Instance.`ID-Instance` = main.`ID-Instance`
  inner join Configuration
    on Configuration.`ID-configuration` = main.`ID-configuration`
where
 `RE-value` in ('lbd', 'luE3')
 and `VD-value` in ('df95', 'df80')
 and `PH-value` in ('std')
 and `CE-value` in ('lin', 'glu', 'min')
 and `CA-value` in ('vsids', 'lbd', 'rnd')
 and `CL-value` in ('1uip')
 and `PR-value` in ('on', 'off')
 and `SH-value` in ('off');")
df$`RE-value` <- as.factor(df$`RE-value`)
df$`VD-value` <- as.factor(df$`VD-value`)
df$`PH-value` <- as.factor(df$`PH-value`)
df$`CE-value` <- as.factor(df$`CE-value`)
df$`CA-value` <- as.factor(df$`CA-value`)
df$`CL-value` <- as.factor(df$`CL-value`)
df$`PR-value` <- as.factor(df$`PR-value`)
df$`SH-value` <- as.factor(df$`SH-value`)
df <- df[, !duplicated(colnames(df))]
library(ggplot2)
data = df %>% filter(Subfamily == "domset_4", `RE-value` == "lbd", `VD-value` == "df95", `PH-value` == "std", `CE-value` == "glu", `PR-value` == "on")
plot <- ggplot(data, aes(
  x = `Variables`,
  y = `CPU-time`,
  color = `CA-value`)) + geom_point() + geom_line() +
  labs(x = "Number of variables", y = "CPU time", color = "Clause assessment", title = "Dominating set formulas (k=4)") +
  legendInBox(c(0.25,0.6))
ggsave(filename = "plots/ca_rnd_vs_vsidis_single.pdf", units = "cm", device = "pdf", width = 10, height = 5)
plot

data = df %>% filter()
timeouts = data %>% 
  group_by(Subfamily, `RE-value`, `VD-value`, `PH-value`, `CE-value`, `CA-value`, `CL-value`, `PR-value`, `SH-value`) %>% 
  summarize(
    timeout = sum(`Solver-answer` == "INDETERMINATE"),
    outofmem = sum(`Solver-answer` == "OUTOFMEMORY" ),
    unsolved = sum(`Solver-answer` %in% c("OUTOFMEMORY", "INDETERMINATE") ),
    n = n())
rndVsLbd = inner_join(
  timeouts %>% filter(`CA-value` == "rnd"),
  timeouts %>% filter(`CA-value` == "lbd"),
  by = c("Subfamily", "RE-value", "VD-value", "PH-value", "CE-value", "CL-value", "PR-value", "SH-value"),
  suffix = c(".rnd", ".other"))
rndVsLbd = rndVsLbd %>% mutate(diff = unsolved.rnd - unsolved.other, other = factor("lbd", c("lbd", "vsids")))
rndVsVsids = inner_join(
  timeouts %>% filter(`CA-value` == "rnd"),
  timeouts %>% filter(`CA-value` == "vsids"),
  by = c("Subfamily", "RE-value", "VD-value", "PH-value", "CE-value", "CL-value", "PR-value", "SH-value"),
  suffix = c(".rnd", ".other"))
rndVsVsids = rndVsVsids %>% mutate(diff = unsolved.rnd - unsolved.other, other = factor("vsids", c("lbd", "vsids")))
data = union(rndVsLbd, rndVsVsids)
           
print("lbd - rnd")
[1] "lbd - rnd"
summary(data %>% filter(other == "lbd") %>% pull(diff))
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 -38.00    0.00    1.00    3.12    6.00   33.00 
print("vsids - rnd")
[1] "vsids - rnd"
summary(data %>% filter(other == "vsids") %>% pull(diff))
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-14.0000   0.0000   0.0000   0.8029   1.0000  33.0000 
plot <- ggplot(data, aes(x = `other`, y = `diff`)) +
  geom_boxplot(outlier.shape = 4) +
  stat_boxplot(geom ='errorbar', width = 0.2) +
  labs(x = "", y = "Difference in number of timeouts", title = "All formula families") +
  scale_x_discrete(labels = c('random - LBD','random - activity-based')) + 
  theme(plot.margin=unit(c(2,2,-3,2),"mm")) +
ggsave(filename = "plots/ca_rnd_boxplot.pdf", units = "cm", device = "pdf", width = 10, height = 6.5)
plot

Each data point is a configuration for a subfamily, the difference is optained by only varying the clause assesment strategie.

The plot shows that there is a difference between vsids and rnd, especially the lower quartiel on the difference is zero, i.e. in less then 25% of the cases rnd is better (for no differnece one would expect more). Especially, when comparing this difference to lbd, it shows that it is not very relevant.

Decay Factor

Load Variable Decay Batch

df <- dbGetQuery(db, "
  SELECT * FROM Experiment as main
  inner join Instance
    on Instance.`ID-Instance` = main.`ID-Instance`
  inner join Configuration
    on Configuration.`ID-configuration` = main.`ID-configuration`
where
     `RE-value` in ('lbd', 'luE3')
 and `VD-value` in ('df99', 'df95', 'df80', 'df65', 'rnd', 'lrb')
 and `PH-value` in ('fix0', 'std')
 and `CE-value` in ('glu', 'min')
 and `CA-value` in ('vsids', 'lbd')
 and `CL-value` in ('1uip')
 and `PR-value` in ('on', 'off')
 and `SH-value` in ('off');")
df$`RE-value` <- as.factor(df$`RE-value`)
df$`VD-value` <- as.factor(df$`VD-value`)
df$`PH-value` <- as.factor(df$`PH-value`)
df$`CE-value` <- as.factor(df$`CE-value`)
df$`CA-value` <- as.factor(df$`CA-value`)
df$`CL-value` <- as.factor(df$`CL-value`)
df$`PR-value` <- as.factor(df$`PR-value`)
df$`SH-value` <- as.factor(df$`SH-value`)
df <- df[, !duplicated(colnames(df))]
data = df %>% filter(Subfamily == "op_partial", `CE-value` == "glu", `RE-value` == "lbd", `PH-value` == "std", `PR-value` == "off", `CA-value` == "lbd", !`VD-value` %in% c("lrb", "rnd"))
plot <- ggplot(data, 
       aes(x = `Variables`,
           y = `CPU-time`, color = `VD-value`)
       ) + 
  geom_point() + geom_line() + 
  legendInBox(c(0.75,0.53)) +
  labs(x = "Number of variables", y = "CPU time", color = "VSIDS decay factor", title = "Partial ordering principle formulas") +
  scale_colour_discrete(labels = c("0.65", "0.80", "0.95", "0.99"))
ggsave(filename = "plots/vd_op_partial.pdf", units = "cm", device = "pdf", width = 10, height = 5.2)
plot

LRB

data = df %>% filter()
timeouts = data %>% 
  group_by(Subfamily, `RE-value`, `VD-value`, `PH-value`, `CE-value`, `CA-value`, `CL-value`, `PR-value`, `SH-value`) %>% 
  summarize(
    timeout = sum(`Solver-answer` == "INDETERMINATE"),
    outofmem = sum(`Solver-answer` == "OUTOFMEMORY" ),
    unsolved = sum(`Solver-answer` %in% c("OUTOFMEMORY", "INDETERMINATE") ),
    n = n())
lrbVs65 = inner_join(
  timeouts %>% filter(`VD-value` == "lrb"),
  timeouts %>% filter(`VD-value` == "df65"),
  by = c("Subfamily", "RE-value", "PH-value", "CE-value", "CL-value", "PR-value", "SH-value", "CA-value"))
lrbVs65 = lrbVs65 %>% mutate(diff = unsolved.x - unsolved.y, other = "df65")
lrbVs99 = inner_join(
  timeouts %>% filter(`VD-value` == "lrb"),
  timeouts %>% filter(`VD-value` == "df99"),
  by = c("Subfamily", "RE-value", "PH-value", "CE-value", "CL-value", "PR-value", "SH-value", "CA-value"))
lrbVs99 = lrbVs99 %>% mutate(diff = unsolved.x - unsolved.y, other = "df99")
df99Vsdf65 = inner_join(
  timeouts %>% filter(`VD-value` == "df99"),
  timeouts %>% filter(`VD-value` == "df65"),
  by = c("Subfamily", "RE-value", "PH-value", "CE-value", "CL-value", "PR-value", "SH-value", "CA-value"))
df99Vsdf65 = df99Vsdf65 %>% mutate(diff = unsolved.x - unsolved.y, other = "df")
data <- union(lrbVs65, lrbVs99)
data <- union(data, df99Vsdf65)
           
print("df65 - lrb")
[1] "df65 - lrb"
summary(data %>% filter(other == "df65") %>% pull(diff))
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-25.0000  -2.0000   0.0000  -0.6743   0.0000  26.0000 
print("df99 - lrb")
[1] "df99 - lrb"
summary(data %>% filter(other == "df99") %>% pull(diff))
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-27.0000  -1.0000   0.0000  -0.8606   1.0000  11.0000 
print("df99 - df65")
[1] "df99 - df65"
summary(data %>% filter(other == "df") %>% pull(diff))
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-27.0000  -3.0000   0.0000   0.1863   1.0000  31.0000 
plot <- ggplot(data, aes(x = `other`, y = `diff`)) +
  geom_boxplot(outlier.shape = 4) +
  stat_boxplot(geom ='errorbar', width = 0.2) +
  labs(x = "", y = "Difference in number of timeouts", title = "All formula families") +
  theme(plot.margin=unit(c(2,2,-3,2),"mm")) +
  scale_x_discrete(labels = c('VSIDS .99 - .65', 'LRB - VSIDS .65','LRB - VSIDS .99'))
ggsave(filename = "plots/vd_lrb_vs_vsids.pdf", units = "cm", device = "pdf", width = 10, height = 6.5)
plot

Restarts

Load main Batch

library(RMySQL)
df <- dbGetQuery(db, "
  SELECT * FROM Experiment as main
  inner join Instance
    on Instance.`ID-Instance` = main.`ID-Instance`
  inner join Configuration
    on Configuration.`ID-configuration` = main.`ID-configuration`
where
 `RE-value` in ('no', 'lbd', 'luE2', 'luE3')
 and `VD-value` in ('df95', 'df80', 'lrb')
 and `PH-value` in ('fix0', 'std')
 and `CE-value` in ('no', 'lin', 'glu', 'min')
 and `CA-value` in ('vsids', 'lbd', 'none')
 and `CL-value` in ('1uip')
 and `PR-value` in ('on', 'off')
 and `SH-value` in ('off');")
df$`RE-value` <- as.factor(df$`RE-value`)
df$`VD-value` <- as.factor(df$`VD-value`)
df$`PH-value` <- as.factor(df$`PH-value`)
df$`CE-value` <- as.factor(df$`CE-value`)
df$`CA-value` <- as.factor(df$`CA-value`)
df$`CL-value` <- as.factor(df$`CL-value`)
df$`PR-value` <- as.factor(df$`PR-value`)
df$`SH-value` <- as.factor(df$`SH-value`)
df <- df[, !duplicated(colnames(df))]
data = df %>% filter(Subfamily == "stone_width3chain_nhalfmarkers", `CE-value` == "glu", `PH-value` == "std", `PR-value` == "off", `CA-value` == "lbd", `VD-value` == "df95")
plot <- ggplot(data, 
       aes(x = `Variables`,
           y = `CPU-time`, color = `RE-value`)
       ) + 
  geom_point() + geom_line() + 
  theme(legend.position = c(1.2,0.5), legend.direction = "vertical", legend.key.size = unit(1, "lines"), plot.margin=unit(c(1,25,1,2),"mm")) +
  scale_colour_discrete(labels = c("LBD", "Luby 100", "Luby 1000", "no restarts")) +
  labs(x = "Number of variables", y = "CPU time", color = "Restarts", title = "Stone formulas", subtitle = "on width 3 chain, #stones = #nodes / 2")
ggsave(filename = "plots/restarts.pdf", units = "cm", device = "pdf", width = 10, height = 5.2)
plot + theme(legend.position = "bottom", legend.direction = "horizontal")

data = df %>% filter(Subfamily == "peb_pyrofpyr_neq3", `CE-value` == "glu", `PH-value` == "std", `PR-value` == "off", `CA-value` == "lbd", `VD-value` == "df80")
plot <- ggplot(data, 
       aes(x = `Variables`,
           y = `CPU-time`, color = `RE-value`)
       ) + 
  geom_point() + geom_line() + 
  scale_colour_discrete(labels = c("LBD", "Luby 100", "Luby 1000", "no restarts")) +
  theme(legend.position = c(1.2,0.5), legend.direction = "vertical", legend.key.size = unit(1, "lines"), plot.margin=unit(c(1,25,1,2),"mm")) +
  labs(x = "Number of variables", y = "CPU time", color = "Restarts", title = "Pebbling on pyramid-of-pyramids graphs")
ggsave(filename = "plots/restarts2.pdf", units = "cm", device = "pdf", width = 10, height = 5)
plot + theme(legend.position = "bottom", legend.direction = "horizontal")

Database Size

data = df %>% filter(Subfamily == "tseitin_reggrid_5", `CA-value` == "lbd", `RE-value` == "lbd", `VD-value` == "df80", `PH-value` == "std", `PR-value` == "off")
ggplot(data, 
       aes(x = `Variables`,
           y = `CPU-time`, color = `CE-value`, group = `ID-configuration`)
       ) + 
  geom_point() + geom_line() +
  labs(x = "Number of variables", y = "CPU time", color = "Clause erasure:")

ggplot(data %>% filter(`Solver-answer` == "UNSATISFIABLE"), 
       aes(x = `Variables`,
           y = `Conflicts` / 1000000, color = `CE-value`, group = `ID-configuration`)
       ) + 
  geom_point() + geom_line() +
  labs(x = "Number of variables", y = "Million conflicts")

Fancy Arrangement

library(gridExtra)
library(cowplot)
data = df %>% filter(Subfamily == "tseitin_reggrid_5", `CA-value` == "lbd", `RE-value` == "lbd", `VD-value` == "df80", `PH-value` == "std", `PR-value` == "off")
plot1 <- ggplot(data, 
       aes(x = `Variables`,
           y = `CPU-time`, color = `CE-value`, group = `ID-configuration`)
       ) + 
  geom_point() + geom_line() +
  theme(plot.margin=unit(c(0,2,0,2),"mm"), legend.position = "bottom") +
  scale_colour_discrete(labels = c("glucose", "linear", "minisat")) +
  labs(x = "Number of variables", y = "CPU time", color = "Clause erasure:")
legend <- get_legend(plot1)
plot1 <- plot1 + theme(legend.position = "none")
plot2 <- ggplot(data %>% filter(`Solver-answer` == "UNSATISFIABLE"), 
       aes(x = `Variables`,
           y = `Conflicts` / 1000000, color = `CE-value`, group = `ID-configuration`)
       ) + 
  geom_point() + geom_line() +
  theme(plot.margin=unit(c(0,2,0,3),"mm"), legend.position = "none")+
  labs(x = "Number of variables", y = "Million conflicts")
titlePlot <- ggplot() + labs(title = "Tseitin formulas on grid graphs (5 rows)") +
  theme(plot.margin=unit(c(2,2,0,2),"mm"))
arrange <- arrangeGrob(plot1,plot2, ncol = 2)
arrange <- arrangeGrob(titlePlot, arrange, legend, heights = c(0.8,5,0.8) ,nrow = 3)
    
ggsave(filename = "plots/clauseErasure.pdf", arrange, units = "cm", device = "pdf", width = 10, height = 6)

Phase Saving

Load Phase Saving Batch

df <- dbGetQuery(db, "
  SELECT * FROM Experiment as main
  inner join Instance
    on Instance.`ID-Instance` = main.`ID-Instance`
  inner join Configuration
    on Configuration.`ID-configuration` = main.`ID-configuration`
where
    `Subfamily` = 'stone_width3chain_nhalfmarkers'
 and `RE-value` in ('lbd', 'luE3')
 and `VD-value` in ('df95', 'df80')
 and `PH-value` in ('fix0', 'std', 'dynrnd', 'fixrnd', 'ctr')
 and `CE-value` in ('glu', 'min')
 and `CA-value` in ('vsids', 'lbd')
 and `CL-value` in ('1uip')
 and `PR-value` in ('on', 'off')
 and `SH-value` in ('off');")
df$`RE-value` <- as.factor(df$`RE-value`)
df$`VD-value` <- as.factor(df$`VD-value`)
df$`PH-value` <- as.factor(df$`PH-value`)
df$`CE-value` <- as.factor(df$`CE-value`)
df$`CA-value` <- as.factor(df$`CA-value`)
df$`CL-value` <- as.factor(df$`CL-value`)
df$`PR-value` <- as.factor(df$`PR-value`)
df$`SH-value` <- as.factor(df$`SH-value`)
df <- df[, !duplicated(colnames(df))]
data = df %>% filter(Subfamily == "stone_width3chain_nhalfmarkers", `CE-value` == "glu", `PR-value` == "off", `CA-value` == "lbd", `VD-value` == "df95", `RE-value` == "lbd")
plot <- ggplot(data, 
       aes(x = `Variables`,
           y = `CPU-time`, color = `PH-value`)
       ) + 
  geom_point() + geom_line() + 
  theme(legend.position = c(1.2,0.5), legend.direction = "vertical", legend.key.size = unit(1.2, "lines"), plot.margin=unit(c(1,25,1,2),"mm")) +
  scale_colour_discrete(labels = c("counter", "dynamic\nrandom", "fix zero", "fix random", "standard")) +
  labs(x = "Number of variables", y = "CPU time", color = "Phase:", title = "Stone formulas", subtitle = "on width 3 chain, #stones = #nodes / 2")
ggsave(filename = "plots/phase.pdf", units = "cm", device = "pdf", width = 10, height = 5.2)
plot + theme(legend.position = "bottom", legend.direction = "horizontal")

---
title: "Plots for Seeking Practical CDCL Insights from Theoretical SAT Benchmarks"
output: html_notebook
---

```{r}
library(dplyr)
library(ggplot2);
library(RMySQL)
db <- dbConnect(MySQL(), host='msql01.csc.kth.se', user='giraldez_user', password='ukbm5FIHvNuk', dbname='giraldez')
```

Custom Theme (based on https://rpubs.com/Koundy/71792)
```{r}
theme_Publication <- function(base_size=10, base_family="Helvetica") {
      library(grid)
      library(ggthemes)
      (theme_foundation(base_size=base_size, base_family=base_family)
       + theme(plot.title = element_text(face = "bold",
                                         size = rel(1), hjust = 0.5),
               plot.subtitle = element_text(hjust = 0.5),
               text = element_text(),
               panel.background = element_rect(colour = NA),
               plot.background = element_rect(colour = NA),
               panel.border = element_rect(colour = NA),
               axis.title = element_text(face = "bold",size = rel(1)),
               axis.title.y = element_text(angle=90,vjust =2),
               axis.title.x = element_text(vjust = -0.2),
               axis.text = element_text(), 
               axis.line = element_line(colour="black"),
               axis.ticks = element_line(),
               panel.grid.major = element_line(colour="#f0f0f0"),
               panel.grid.minor = element_blank(),
               legend.key = element_rect(colour = NA),
               legend.position = "bottom",
               legend.direction = "horizontal",
               legend.key.size= unit(0.2, "cm"),
               legend.spacing = unit(0, "cm"),
               legend.title = element_text(face = "italic", size = rel(1)),
               plot.margin=unit(c(1,2,1,2),"mm"),
               strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
               strip.text = element_text(face="bold")
          ))
      
}

theme_set(theme_Publication())
scale_colour_discrete <- function(...) scale_color_brewer(..., palette="Set2")
legendInBox <- function(pos = c(0,0)) theme(legend.position = pos, legend.direction = "vertical", legend.box.background = element_rect(colour = "#000000"), legend.key.size = unit(0.9, "lines"))
update_geom_defaults("point", list(size = 0.7))
```


# CA:VSIDS vs CA:RND

```{r}
df <- dbGetQuery(db, "
  SELECT * FROM Experiment as main
  inner join Instance
  	on Instance.`ID-Instance` = main.`ID-Instance`
  inner join Configuration
  	on Configuration.`ID-configuration` = main.`ID-configuration`
where
 `RE-value` in ('lbd', 'luE3')
 and `VD-value` in ('df95', 'df80')
 and `PH-value` in ('std')
 and `CE-value` in ('lin', 'glu', 'min')
 and `CA-value` in ('vsids', 'lbd', 'rnd')
 and `CL-value` in ('1uip')
 and `PR-value` in ('on', 'off')
 and `SH-value` in ('off');")
df$`RE-value` <- as.factor(df$`RE-value`)
df$`VD-value` <- as.factor(df$`VD-value`)
df$`PH-value` <- as.factor(df$`PH-value`)
df$`CE-value` <- as.factor(df$`CE-value`)
df$`CA-value` <- as.factor(df$`CA-value`)
df$`CL-value` <- as.factor(df$`CL-value`)
df$`PR-value` <- as.factor(df$`PR-value`)
df$`SH-value` <- as.factor(df$`SH-value`)
df <- df[, !duplicated(colnames(df))]
```

```{r}
library(ggplot2)
data = df %>% filter(Subfamily == "domset_4", `RE-value` == "lbd", `VD-value` == "df95", `PH-value` == "std", `CE-value` == "glu", `PR-value` == "on")

plot <- ggplot(data, aes(
  x = `Variables`,
  y = `CPU-time`,
  color = `CA-value`)) + geom_point() + geom_line() +
  labs(x = "Number of variables", y = "CPU time", color = "Clause assessment", title = "Dominating set formulas (k=4)") +
  legendInBox(c(0.25,0.6))

ggsave(filename = "plots/ca_rnd_vs_vsidis_single.pdf", units = "cm", device = "pdf", width = 10, height = 5)
plot
```

```{r}
data = df %>% filter()
timeouts = data %>% 
  group_by(Subfamily, `RE-value`, `VD-value`, `PH-value`, `CE-value`, `CA-value`, `CL-value`, `PR-value`, `SH-value`) %>% 
  summarize(
    timeout = sum(`Solver-answer` == "INDETERMINATE"),
    outofmem = sum(`Solver-answer` == "OUTOFMEMORY" ),
    unsolved = sum(`Solver-answer` %in% c("OUTOFMEMORY", "INDETERMINATE") ),
    n = n())

rndVsLbd = inner_join(
  timeouts %>% filter(`CA-value` == "rnd"),
  timeouts %>% filter(`CA-value` == "lbd"),
  by = c("Subfamily", "RE-value", "VD-value", "PH-value", "CE-value", "CL-value", "PR-value", "SH-value"),
  suffix = c(".rnd", ".other"))
rndVsLbd = rndVsLbd %>% mutate(diff = unsolved.rnd - unsolved.other, other = factor("lbd", c("lbd", "vsids")))

rndVsVsids = inner_join(
  timeouts %>% filter(`CA-value` == "rnd"),
  timeouts %>% filter(`CA-value` == "vsids"),
  by = c("Subfamily", "RE-value", "VD-value", "PH-value", "CE-value", "CL-value", "PR-value", "SH-value"),
  suffix = c(".rnd", ".other"))
rndVsVsids = rndVsVsids %>% mutate(diff = unsolved.rnd - unsolved.other, other = factor("vsids", c("lbd", "vsids")))

data = union(rndVsLbd, rndVsVsids)
           
print("lbd - rnd")
summary(data %>% filter(other == "lbd") %>% pull(diff))
print("vsids - rnd")
summary(data %>% filter(other == "vsids") %>% pull(diff))
plot <- ggplot(data, aes(x = `other`, y = `diff`)) +
  geom_boxplot(outlier.shape = 4) +
  stat_boxplot(geom ='errorbar', width = 0.2) +
  labs(x = "", y = "Difference in number of timeouts", title = "All formula families") +
  scale_x_discrete(labels = c('random - LBD','random - activity-based')) + 
  theme(plot.margin=unit(c(2,2,-3,2),"mm")) +
ggsave(filename = "plots/ca_rnd_boxplot.pdf", units = "cm", device = "pdf", width = 10, height = 6.5)
plot
```

Each data point is a configuration for a subfamily, the difference is optained by only varying the clause assesment strategie.

The plot shows that there is a difference between vsids and rnd, especially the lower quartiel on the difference is zero,
i.e. in less then 25% of the cases rnd is better (for no differnece one would expect more). Especially, when comparing
this difference to lbd, it shows that it is not very relevant.


# Decay Factor

Load Variable Decay Batch

```{r}
df <- dbGetQuery(db, "
  SELECT * FROM Experiment as main
  inner join Instance
  	on Instance.`ID-Instance` = main.`ID-Instance`
  inner join Configuration
  	on Configuration.`ID-configuration` = main.`ID-configuration`
where
     `RE-value` in ('lbd', 'luE3')
 and `VD-value` in ('df99', 'df95', 'df80', 'df65', 'rnd', 'lrb')
 and `PH-value` in ('fix0', 'std')
 and `CE-value` in ('glu', 'min')
 and `CA-value` in ('vsids', 'lbd')
 and `CL-value` in ('1uip')
 and `PR-value` in ('on', 'off')
 and `SH-value` in ('off');")
df$`RE-value` <- as.factor(df$`RE-value`)
df$`VD-value` <- as.factor(df$`VD-value`)
df$`PH-value` <- as.factor(df$`PH-value`)
df$`CE-value` <- as.factor(df$`CE-value`)
df$`CA-value` <- as.factor(df$`CA-value`)
df$`CL-value` <- as.factor(df$`CL-value`)
df$`PR-value` <- as.factor(df$`PR-value`)
df$`SH-value` <- as.factor(df$`SH-value`)
df <- df[, !duplicated(colnames(df))]
```


```{r}
data = df %>% filter(Subfamily == "op_partial", `CE-value` == "glu", `RE-value` == "lbd", `PH-value` == "std", `PR-value` == "off", `CA-value` == "lbd", !`VD-value` %in% c("lrb", "rnd"))

plot <- ggplot(data, 
       aes(x = `Variables`,
           y = `CPU-time`, color = `VD-value`)
       ) + 
  geom_point() + geom_line() + 
  legendInBox(c(0.75,0.53)) +
  labs(x = "Number of variables", y = "CPU time", color = "VSIDS decay factor", title = "Partial ordering principle formulas") +
  scale_colour_discrete(labels = c("0.65", "0.80", "0.95", "0.99"))
ggsave(filename = "plots/vd_op_partial.pdf", units = "cm", device = "pdf", width = 10, height = 5.2)
plot
```
## LRB

```{r}
data = df %>% filter()
timeouts = data %>% 
  group_by(Subfamily, `RE-value`, `VD-value`, `PH-value`, `CE-value`, `CA-value`, `CL-value`, `PR-value`, `SH-value`) %>% 
  summarize(
    timeout = sum(`Solver-answer` == "INDETERMINATE"),
    outofmem = sum(`Solver-answer` == "OUTOFMEMORY" ),
    unsolved = sum(`Solver-answer` %in% c("OUTOFMEMORY", "INDETERMINATE") ),
    n = n())

lrbVs65 = inner_join(
  timeouts %>% filter(`VD-value` == "lrb"),
  timeouts %>% filter(`VD-value` == "df65"),
  by = c("Subfamily", "RE-value", "PH-value", "CE-value", "CL-value", "PR-value", "SH-value", "CA-value"))
lrbVs65 = lrbVs65 %>% mutate(diff = unsolved.x - unsolved.y, other = "df65")

lrbVs99 = inner_join(
  timeouts %>% filter(`VD-value` == "lrb"),
  timeouts %>% filter(`VD-value` == "df99"),
  by = c("Subfamily", "RE-value", "PH-value", "CE-value", "CL-value", "PR-value", "SH-value", "CA-value"))
lrbVs99 = lrbVs99 %>% mutate(diff = unsolved.x - unsolved.y, other = "df99")

df99Vsdf65 = inner_join(
  timeouts %>% filter(`VD-value` == "df99"),
  timeouts %>% filter(`VD-value` == "df65"),
  by = c("Subfamily", "RE-value", "PH-value", "CE-value", "CL-value", "PR-value", "SH-value", "CA-value"))
df99Vsdf65 = df99Vsdf65 %>% mutate(diff = unsolved.x - unsolved.y, other = "df")


data <- union(lrbVs65, lrbVs99)
data <- union(data, df99Vsdf65)
           
print("df65 - lrb")
summary(data %>% filter(other == "df65") %>% pull(diff))
print("df99 - lrb")
summary(data %>% filter(other == "df99") %>% pull(diff))
print("df99 - df65")
summary(data %>% filter(other == "df") %>% pull(diff))

plot <- ggplot(data, aes(x = `other`, y = `diff`)) +
  geom_boxplot(outlier.shape = 4) +
  stat_boxplot(geom ='errorbar', width = 0.2) +
  labs(x = "", y = "Difference in number of timeouts", title = "All formula families") +
  theme(plot.margin=unit(c(2,2,-3,2),"mm")) +
  scale_x_discrete(labels = c('VSIDS .99 - .65', 'LRB - VSIDS .65','LRB - VSIDS .99'))
ggsave(filename = "plots/vd_lrb_vs_vsids.pdf", units = "cm", device = "pdf", width = 10, height = 6.5)
plot
```


# Restarts

Load main Batch

```{r}
library(RMySQL)
df <- dbGetQuery(db, "
  SELECT * FROM Experiment as main
  inner join Instance
  	on Instance.`ID-Instance` = main.`ID-Instance`
  inner join Configuration
  	on Configuration.`ID-configuration` = main.`ID-configuration`
where
 `RE-value` in ('no', 'lbd', 'luE2', 'luE3')
 and `VD-value` in ('df95', 'df80', 'lrb')
 and `PH-value` in ('fix0', 'std')
 and `CE-value` in ('no', 'lin', 'glu', 'min')
 and `CA-value` in ('vsids', 'lbd', 'none')
 and `CL-value` in ('1uip')
 and `PR-value` in ('on', 'off')
 and `SH-value` in ('off');")
df$`RE-value` <- as.factor(df$`RE-value`)
df$`VD-value` <- as.factor(df$`VD-value`)
df$`PH-value` <- as.factor(df$`PH-value`)
df$`CE-value` <- as.factor(df$`CE-value`)
df$`CA-value` <- as.factor(df$`CA-value`)
df$`CL-value` <- as.factor(df$`CL-value`)
df$`PR-value` <- as.factor(df$`PR-value`)
df$`SH-value` <- as.factor(df$`SH-value`)
df <- df[, !duplicated(colnames(df))]
```

```{r}
data = df %>% filter(Subfamily == "stone_width3chain_nhalfmarkers", `CE-value` == "glu", `PH-value` == "std", `PR-value` == "off", `CA-value` == "lbd", `VD-value` == "df95")

plot <- ggplot(data, 
       aes(x = `Variables`,
           y = `CPU-time`, color = `RE-value`)
       ) + 
  geom_point() + geom_line() + 
  theme(legend.position = c(1.2,0.5), legend.direction = "vertical", legend.key.size = unit(1, "lines"), plot.margin=unit(c(1,25,1,2),"mm")) +
  scale_colour_discrete(labels = c("LBD", "Luby 100", "Luby 1000", "no restarts")) +
  labs(x = "Number of variables", y = "CPU time", color = "Restarts", title = "Stone formulas", subtitle = "on width 3 chain, #stones = #nodes / 2")
ggsave(filename = "plots/restarts.pdf", units = "cm", device = "pdf", width = 10, height = 5.2)
plot + theme(legend.position = "bottom", legend.direction = "horizontal")
```

```{r}
data = df %>% filter(Subfamily == "peb_pyrofpyr_neq3", `CE-value` == "glu", `PH-value` == "std", `PR-value` == "off", `CA-value` == "lbd", `VD-value` == "df80")

plot <- ggplot(data, 
       aes(x = `Variables`,
           y = `CPU-time`, color = `RE-value`)
       ) + 
  geom_point() + geom_line() + 
  scale_colour_discrete(labels = c("LBD", "Luby 100", "Luby 1000", "no restarts")) +
  theme(legend.position = c(1.2,0.5), legend.direction = "vertical", legend.key.size = unit(1, "lines"), plot.margin=unit(c(1,25,1,2),"mm")) +
  labs(x = "Number of variables", y = "CPU time", color = "Restarts", title = "Pebbling on pyramid-of-pyramids graphs")
ggsave(filename = "plots/restarts2.pdf", units = "cm", device = "pdf", width = 10, height = 5)
plot + theme(legend.position = "bottom", legend.direction = "horizontal")
```


# Database Size
```{r}
data = df %>% filter(Subfamily == "tseitin_reggrid_5", `CA-value` == "lbd", `RE-value` == "lbd", `VD-value` == "df80", `PH-value` == "std", `PR-value` == "off")

ggplot(data, 
       aes(x = `Variables`,
           y = `CPU-time`, color = `CE-value`, group = `ID-configuration`)
       ) + 
  geom_point() + geom_line() +
  labs(x = "Number of variables", y = "CPU time", color = "Clause erasure:")

ggplot(data %>% filter(`Solver-answer` == "UNSATISFIABLE"), 
       aes(x = `Variables`,
           y = `Conflicts` / 1000000, color = `CE-value`, group = `ID-configuration`)
       ) + 
  geom_point() + geom_line() +
  labs(x = "Number of variables", y = "Million conflicts")
```


## Fancy Arrangement
```{r}
library(gridExtra)
library(cowplot)
data = df %>% filter(Subfamily == "tseitin_reggrid_5", `CA-value` == "lbd", `RE-value` == "lbd", `VD-value` == "df80", `PH-value` == "std", `PR-value` == "off")

plot1 <- ggplot(data, 
       aes(x = `Variables`,
           y = `CPU-time`, color = `CE-value`, group = `ID-configuration`)
       ) + 
  geom_point() + geom_line() +
  theme(plot.margin=unit(c(0,2,0,2),"mm"), legend.position = "bottom") +
  scale_colour_discrete(labels = c("glucose", "linear", "minisat")) +
  labs(x = "Number of variables", y = "CPU time", color = "Clause erasure:")

legend <- get_legend(plot1)
plot1 <- plot1 + theme(legend.position = "none")

plot2 <- ggplot(data %>% filter(`Solver-answer` == "UNSATISFIABLE"), 
       aes(x = `Variables`,
           y = `Conflicts` / 1000000, color = `CE-value`, group = `ID-configuration`)
       ) + 
  geom_point() + geom_line() +
  theme(plot.margin=unit(c(0,2,0,3),"mm"), legend.position = "none")+
  labs(x = "Number of variables", y = "Million conflicts")

titlePlot <- ggplot() + labs(title = "Tseitin formulas on grid graphs (5 rows)") +
  theme(plot.margin=unit(c(2,2,0,2),"mm"))

arrange <- arrangeGrob(plot1,plot2, ncol = 2)
arrange <- arrangeGrob(titlePlot, arrange, legend, heights = c(0.8,5,0.8) ,nrow = 3)
    
ggsave(filename = "plots/clauseErasure.pdf", arrange, units = "cm", device = "pdf", width = 10, height = 6)
```

# Phase Saving

Load Phase Saving Batch

```{r}
df <- dbGetQuery(db, "
  SELECT * FROM Experiment as main
  inner join Instance
  	on Instance.`ID-Instance` = main.`ID-Instance`
  inner join Configuration
  	on Configuration.`ID-configuration` = main.`ID-configuration`
where
    `Subfamily` = 'stone_width3chain_nhalfmarkers'
 and `RE-value` in ('lbd', 'luE3')
 and `VD-value` in ('df95', 'df80')
 and `PH-value` in ('fix0', 'std', 'dynrnd', 'fixrnd', 'ctr')
 and `CE-value` in ('glu', 'min')
 and `CA-value` in ('vsids', 'lbd')
 and `CL-value` in ('1uip')
 and `PR-value` in ('on', 'off')
 and `SH-value` in ('off');")
df$`RE-value` <- as.factor(df$`RE-value`)
df$`VD-value` <- as.factor(df$`VD-value`)
df$`PH-value` <- as.factor(df$`PH-value`)
df$`CE-value` <- as.factor(df$`CE-value`)
df$`CA-value` <- as.factor(df$`CA-value`)
df$`CL-value` <- as.factor(df$`CL-value`)
df$`PR-value` <- as.factor(df$`PR-value`)
df$`SH-value` <- as.factor(df$`SH-value`)
df <- df[, !duplicated(colnames(df))]
```

```{r}
data = df %>% filter(Subfamily == "stone_width3chain_nhalfmarkers", `CE-value` == "glu", `PR-value` == "off", `CA-value` == "lbd", `VD-value` == "df95", `RE-value` == "lbd")

plot <- ggplot(data, 
       aes(x = `Variables`,
           y = `CPU-time`, color = `PH-value`)
       ) + 
  geom_point() + geom_line() + 
  theme(legend.position = c(1.2,0.5), legend.direction = "vertical", legend.key.size = unit(1.2, "lines"), plot.margin=unit(c(1,25,1,2),"mm")) +
  scale_colour_discrete(labels = c("counter", "dynamic\nrandom", "fix zero", "fix random", "standard")) +
  labs(x = "Number of variables", y = "CPU time", color = "Phase:", title = "Stone formulas", subtitle = "on width 3 chain, #stones = #nodes / 2")
ggsave(filename = "plots/phase.pdf", units = "cm", device = "pdf", width = 10, height = 5.2)
plot + theme(legend.position = "bottom", legend.direction = "horizontal")
```
