Participant Demographics

Model Characteristics

Model attributes
Number rankings versus theories

Analysis

Do participants chose randomly?

Total Deviations from Uniformity by Style and Size.
Deviations from Uniformity by Theory and Style and Size.

Are participants consistent?

Higher than random concentrations
Lower than random concentrations
No concentrations

Stated Method and Aggrement

Mosaics

Effect of Number Style
Effect of Model Depth

Free Style Weighting
Restricted Weighting

Effect of Sex
Effect of Background

Effect of Background – Restricted
Effect of Background – Free Sytle

Effect of Age

User Perception of Numeric Contribution Semantics for Goal Models: an Exploratory Experiment (ER’2017)

Norah Alothman, Mehrnaz Zhian and Sotirios Liaskos

15 July, 2017

Abstract

This page accompanies our paper submission to the International Conference on Conceptual Modeling (ER 2017). The data, redacted to include information only relevant to this study and remove personally identifying information can be found here and here, availed here in compliance to clause “Anonymized data will be included as an appendix in the PIs major research report and potential publications of this research.” of informed consent. The models used can be found here.

# Read the data

# First data set 
df1 <- read.csv("http://www.yorku.ca/liaskos/Papers/ER2017/Data/DataSet1.csv",header = TRUE,stringsAsFactors=FALSE)
# Second data set with revised second instrument. 
df2 <- read.csv("http://www.yorku.ca/liaskos/Papers/ER2017/Data/DataSet2.csv",header = TRUE,stringsAsFactors=FALSE)
dfModels <- read.csv("Models.csv",header = TRUE,stringsAsFactors=FALSE)

#
# Deal with consistency (available for second bunhc only)
#
df2$Consistency1 <- df2$S1.R4 - df2$S1.R4.Duplicate
df2$Consistency2 <- df2$S2.R1 - df2$S2.R1.Duplicate

# Clean - Up
df2$S1.R4.Duplicate <- NULL
df2$S2.R1.Duplicate <- NULL


#
# Find Preferred Model from Number
#

dfM <- melt(dfModels,id.vars = c("Structure","Model"))
dfM$variable <- as.character(dfM$variable)
for (i in c(1,2,3,5,6,7,8,9,10)) {
  for (j in 1:4) {
    colu <- paste0("S",i,".R",j)
    df2[,paste0(colu,".v2")] <- as.character(lapply(df2[,colu], FUN = function(x) dfM[(dfM$Structure == i) & (dfM$Model == j) & (round(dfM$value,2) == round(x,2)),c("variable")]))
  }
}


#
# Consistency Filtering (2nd version only)
#
# Two of the models (S1R4 and S2S1) are repeated. We filter responses 
# for which we have a greater than .2 total distance between the replicated 
# responses
df2<-df2[df2$Consistency1 + df2$Consistency2<0.2,]


#
# Merge the two data sets
#

# Clean Up to prepare for rbind
df2$Consistency1 <- NULL
df2$Consistency2 <- NULL


## Prepare trailing questions for rbind
df1$Meth1 <- "0"
df1$Meth.Details <- ""

df2$Str1Calculation <- ""
df2$Str1Calculation.Other. <- ""

df2$Response.ID <- df2$Response.ID + 2000

df<-rbind(df1, df2)

# Recode Various fields

dfOriginal <- df

# Map The Degree
df[df$Degre == "Master degree","Degree"] = "MSc"
df[df$Degre == "Bachelor degree","Degree"] = "BSc"
df[df$Degre == "PhD degree","Degree"] = "PhD"
df[df$Degre == "Other(MD)","Degree"] = "PhD"

# Map the Field of Study
df[df$FieldOfStudy == "Humanities (e.g. English, Philosophy, History)","FieldOfStudy"] = "Humanities"
df[df$FieldOfStudy == "Social Sciences (e.g. Sociology, Political Science, Communication)","FieldOfStudy"] = "Social Sciences"
df[df$FieldOfStudy == "Science, Technology and Engineering","FieldOfStudy"] = "Science and Tech."
df[df$FieldOfStudy == "Business and Economics","FieldOfStudy"] = "Business and Econ."
df[df$FieldOfStudy == "Other(Two of the above: social sciences and humanities)","FieldOfStudy"] = "Social Sciences"
df[df$FieldOfStudy == "Other(journalism)","FieldOfStudy"] = "Social Sciences"


#Re-code the trailing part
df[df$Str1Calculation == "0.8*0.8/(0.8+0.8) + 0.4*0.9/(0.4+0.9) = 0.64/1.8 + 0.36/1.3 = 0.63","StatedMethod"] = "SerPar"
df[df$Str1Calculation == "max(0.8*0.8, 0.4*0.9) = max(0.64, 0.36) = 0.64","StatedMethod"] = "Bayes"
df[df$Str1Calculation == "0.8*0.8 + 0.4*0.9 = 0.64 + 0.36 = 1.0","StatedMethod"] = "Linear"
df[df$Str1Calculation == "max ( min(0.8*0.8) , min(0.4*0.9) ) = max(0.8 , 0.4) = 0.8","StatedMethod"] = "MinMax"
df[df$Str1Calculation == "min(0.8,0.8) + min(0.4,0.9) = 0.8 + 0.4 = 1.2","StatedMethod"] = "Other"
df[df$Str1Calculation == "Other","StatedMethod"] = "Other"

# Based on last author's qualitative judgement of the "Other" field.
df[df$Response.ID %in% c(78,60),"StatedMethod"] = "Don't know"
df[df$Response.ID %in% c(2022,2031),"StatedMethod"] = "Linear"
df[df$Response.ID %in% c(2015,2021,2035),"StatedMethod"] = "Other"

# Recode for levels, styles.

levels <- list(L1 = c(1,2,3), L2 = c(5,6,7), L3 = c(8,9,10))
typ <- list(Rest = c(1,2), Free = c(3,4))
measures <- c("Bayes","Linear","MinMax","SerPar")

# Create the colunns
colist = NULL
for (l in c("L1","L2","L3")){
  for (r in c("Free","Rest")){
    for (m in measures){
      colist = c(colist,paste0(l,".",r,".",m))
      df[,c(paste0(l,".",r,".",m))] = 0
    }
  }
}

# Fill in the columns
for (i in 1:nrow(df)) { # For each case
  for (l in c("L1","L2","L3")){ # For each Size
    for (r in c("Free","Rest")){ # For each Style
      for (m in measures){
        targetColumn = paste0(l,".",r,".",m)
        bag <- NULL
        for (x in levels[[l]]){
          for (y in typ[[r]]){
            bag = c(bag,df[i,paste0("S",x,".R",y,".v2")])
          }
        }
        bag <- factor(bag,levels = measures)
        t <- table(bag)
        df[i,targetColumn] = t[[m]]
      }
    }
  }
}

dfW <- df[,c("Response.ID","Age","Gender","Degree","FieldOfStudy",colist)]

dfL <- melt(dfW, id=c("Response.ID", "Age","Gender","Degree","FieldOfStudy"))
dfL$Level = substr(dfL$variable,1,2)
dfL$Style = substr(dfL$variable,4,7)
dfL$Model = substring(dfL$variable,9)

dfL$Level = factor(dfL$Level)
dfL$Style = factor(dfL$Style)
dfL$Model = factor(dfL$Model)

dd <- dfL

Participant Demographics

x<-xtabs( ~ FieldOfStudy + Gender,data = dfW)
addmargins(x)

##                                        Gender
## FieldOfStudy                            Female Male Sum
##   Business and Econ.                         8    5  13
##   Education                                  3    3   6
##   Fine Arts (e.g. Music, Theater, Film)      2    3   5
##   Health Sciences                            1    1   2
##   Humanities                                 8    3  11
##   Science and Tech.                          3    7  10
##   Social Sciences                            6    3   9
##   Sum                                       31   25  56

Model Characteristics

ra <- t(apply(dfModels[,c("Bayes","MinMax","SerPar","Linear")],1,rank))
r<-apply(ra,MARGIN = 2, FUN = table)
Size = c("Small","Small","Small","Medium","Medium","Medium","Large","Large","Large")
Depth = c(1,1,1,2,2,2,3,3,3)
Goals = c(3,4,5,5,7,6,7,9,10)
Contributions = c(2,3,4,4,6,5,6,8,9)
s <- cbind(Size, Depth, Goals, Contributions)

Model attributes

ftable(s[order(Depth),])

##      Size  Depth  Goals Contributions
##                                      
## A  Small  1      3             2     
## B  Small  1      4             3     
## C  Small  1      5             4     
## D  Medium 2      5             4     
## E  Medium 2      7             6     
## F  Medium 2      6             5     
## G  Large  3      7             6     
## H  Large  3      9             8     
## I  Large  3      10            9

#xtable(s[order(Depth),])

Number rankings versus theories

Each row in the table below is each of the model. Each column is each theory. The cell represents the ranking of the number resulting from applying the corresponding theory. For example if for a model SerPar gives 0.2, Bayes 0.4, MinMax 0.5 and Linear 0.3, (an unlikely contigence), the rankings will be respectivelly 1,3,4,2.

ra

##       Bayes MinMax SerPar Linear
##  [1,]     2      3      1      4
##  [2,]     2      3      1      4
##  [3,]     2      3      1      4
##  [4,]     2      3      1      4
##  [5,]     2      3      1      4
##  [6,]     2      3      1      4
##  [7,]     2      3      1      4
##  [8,]     2      3      1      4
##  [9,]     2      3      1      4
## [10,]     2      4      1      3
## [11,]     2      3      1      4
## [12,]     2      3      1      4
## [13,]     2      4      1      3
## [14,]     2      3      1      4
## [15,]     2      3      1      4
## [16,]     2      3      1      4
## [17,]     2      3      1      4
## [18,]     2      3      1      4
## [19,]     2      3      1      4
## [20,]     2      3      1      4
## [21,]     2      4      1      3
## [22,]     2      3      1      4
## [23,]     2      3      1      4
## [24,]     2      3      1      4
## [25,]     2      3      1      4
## [26,]     2      4      1      3
## [27,]     2      3      1      4
## [28,]     2      3      1      4
## [29,]     2      3      1      4
## [30,]     2      3      1      4
## [31,]     2      3      1      4
## [32,]     2      3      1      4
## [33,]     2      4      1      3
## [34,]     2      3      1      4
## [35,]     2      3      1      4
## [36,]     2      3      1      4

So we observe that Linear is always the largest number the vast majority of the times, MinMax is the second largest also for the vast majority of times, Bayes is always the second smallest and Serial-Parallel is aways the smallest.

Analysis

Do participants chose randomly?

par(mfrow=c(1,2))
dfN <- df
rec<-data.frame()

for (alt in c("greater","less","two.sided")) { # for each test type
  for (j in c(1,2,3,5,6,7,8,9,10)){ # For each structure
    for (k in c(1,2,3,4)) { # For each numberset
      col <- paste0("S",j,".R",k,".v2")
      c <- table(factor(dfN[, col], levels = c("Bayes","Linear","MinMax","SerPar")))
      res <- lapply(c,function(x) binom.test(x,nrow(dfN),1/4,alternative = alt))
      rec <- rbind(rec,cbind(Model = col, 
                             Size = ifelse(j %in% c(1,2,3), 
                                           "Small", ifelse(j %in% c(5,6,7),"Medium","Large")), 
                             Style = ifelse(k %in% c(1,2), 
                                            "Restricted", "Free"), 
                             Direction = ifelse(alt=="two.sided","two.sided",ifelse(alt=="greater","More Preferred","Less Preferred")),
                             Bayes_p  = as.numeric(as.character(res[[1]]$p.value)), 
                             Linear_p = as.numeric(res[[2]]$p.value), 
                             MinMax_p = as.numeric(res[[3]]$p.value), 
                             SerPar_p = as.numeric(res[[4]]$p.value)
      )
      )
    } # Numberset
  } # Structure
} # Type of binomial


rec$Bayes_p <- as.numeric(as.character(rec$Bayes_p))
rec$Linear_p <- as.numeric(as.character(rec$Linear_p))
rec$MinMax_p <- as.numeric(as.character(rec$MinMax_p))
rec$SerPar_p <- as.numeric(as.character(rec$SerPar_p))

alpha = 0.05
rec$Bayes <- ifelse(rec$Bayes_p < alpha,1,0)
rec$Linear <- ifelse(rec$Linear_p < alpha,1,0)
rec$MinMax <- ifelse(rec$MinMax_p < alpha,1,0)
rec$SerPar <- ifelse(rec$SerPar_p < alpha,1,0)
rec$None <- ifelse((rec$Bayes==0)&(rec$Linear==0)&(rec$MinMax==0)&(rec$SerPar==0),1,0)
rec$Some <- ifelse((rec$Bayes==0)&(rec$Linear==0)&(rec$MinMax==0)&(rec$SerPar==0),0,1)


rec$Bayes_p <- NULL
rec$Linear_p <- NULL
rec$MinMax_p <- NULL
rec$SerPar_p <- NULL

    
recL = melt(rec,id.vars = c("Model","Size","Style","Direction"))

colnames(recL)[6]  <- "Models"
colnames(recL)[5]  <- "Theory"

d1 <- recL[recL$Theory %in% c("Some","None"),]
d1 <- d1[d1$Direction == "two.sided",]

d2 <- recL[recL$Theory %in% c("Bayes","Linear","MinMax","SerPar"),]
d2 <- d2[d2$Direction != "two.sided",]

Total Deviations from Uniformity by Style and Size.

ggplot(data = d1, aes(x=Size, fill=Theory, y=Models)) + 
  geom_bar(stat="identity") +
  facet_wrap(~ Style) + ylab("# non-random choices") + 
  theme(text = element_text(size=20))

Deviations from Uniformity by Theory and Style and Size.

ggplot(data = d2, aes(x=Size, fill=Theory, y=Models)) + 
  geom_bar(stat="identity") +
  facet_wrap(~ Direction + Style) + 
  ylab("# of non-random choices") + 
  theme(text = element_text(size=20))

Are participants consistent?

getBinomP <- function(x,n,p,alt) {
  r<-binom.test(x,n,p,alternative = alt)
  return(r$p.value)
}


a<-aggregate(value ~ Response.ID + Model,data = dd, FUN = sum)
a$BinomTS_p <- lapply(a$value, FUN = getBinomP,36,1/4,"two.sided")
a$BinomG_p <- lapply(a$value, FUN = getBinomP,36,1/4,"g")
a$BinomL_p <- lapply(a$value, FUN = getBinomP,36,1/4,"l")


a$BinomTS <- ifelse(a$BinomTS_p < 0.05,1,0)
a$BinomG <- ifelse(a$BinomG_p < 0.05,1,0)
a$BinomL <- ifelse(a$BinomL_p < 0.05,1,0)
a$BinomN <- ifelse(a$BinomL+a$BinomG+a$BinomTS == 0,1,0)

b<-aggregate(BinomG ~ Response.ID + Model,data = a, FUN = sum)
b1<-table(b[b$BinomG==1,])
great<-apply(b1[,,1],2,sum)

c<-aggregate(BinomL ~ Response.ID + Model,data = a, FUN = sum)
c1<-table(c[c$BinomL==1,])
less<-apply(c1,2,sum)

n<-aggregate(BinomN ~ Response.ID,data = a, FUN = sum)
none<- nrow(n[n$BinomN==4,])

aS <- na.omit(merge(a,df[,c("Response.ID","StatedMethod")]))
agreed <- nrow(aS[(aS$StatedMethod == aS$Model) & (aS$BinomG ==1),])/length(unique(aS$Response.ID))
opposite <- nrow(aS[(aS$StatedMethod == aS$Model) & (aS$BinomL ==1),])/length(unique(aS$Response.ID))

Higher than random concentrations

great

##  Bayes Linear MinMax SerPar 
##      5     13     27      2

Lower than random concentrations

less

##  Bayes Linear MinMax SerPar 
##      9     12      4     35

No concentrations

none

## [1] 7

Stated Method and Aggrement

At the end of the first version of the instrument the participants are asked what method they follow through an example mode and calculations on the example model representing each of the theories, with the additional option to provide their own method. At the end of the second version of the instrument the participants are instead asked whether “[they] used a specific calculation method”, which they had to describe in their own words or if instead “[they] did not use a specific calculation method, [but] just used [their] intuition”.

Of the total of 30 responses coming from the first version and the 26 responses coming from the second one we identify 35 participants who forcibly (first instrument) or voluntarily (second instrument) identify a specific way of working through the aggregations. Most of those responses were the result of selection from the corresponding multiple choice question (1st version) while a smaller number is described verbally (1st and second version). The latter (8 cases total) are classified under one of the theories or special “Other” and “Don’t Know” categories through qualitative analysis of the response. The table below represents the distribution of the stated methods.

table(df$StatedMethod)

## 
##      Bayes Don't know     Linear     MinMax      Other     SerPar 
##          4          2          8          2          8         11

There is a surprising perception that the Serial Parallel model is used, when it is not. As a matter of fact a mere 20% of those 35 participants states that they use a method that also happens to be the one they actually used with statistically significant consistency. A 25.71% actually states that they follow a theory they actually use less with statistically significant consistency.

The finding becomes less surprising by focusing on the second version of the instrument in which participants are not forced to choose a calculation method but are asked if they use their intuition rather than performing calculations. Out of the 26 responses 21 actually say they just use their intuition (no specific method).

Mosaics

Effect of Number Style

In aggregate, we can observe below that free style (weights adding up to more than 1.5) there is a tenancy for participants to select Bayesian or MinMax model and not Linear and SerPar. For restricted style models on the other hand, participants tend to choose Linear much more than other theories, whose choice frequency is much less than expected.

dd$Theory <- dd$Model
x<-xtabs(value ~ Style + Theory,data = dd)
interp <- function(x) pmin(x/6,1)
residuals = (x - 1008/4)/sqrt(1008/4)
mosaic(x,shade = TRUE,gp_args = list(interpolate = interp),labeling = labeling_values, residuals =  residuals, gp_text = gpar(fontsize = 18), gp_labels = gpar(fontsize = 12))

CrossTable(x,format="SPSS",prop.chisq = FALSE, prop.c = FALSE)

## 
##    Cell Contents
## |-------------------------|
## |                   Count |
## |             Row Percent |
## |           Total Percent |
## |-------------------------|
## 
## Total Observations in Table:  2016 
## 
##              | Theory 
##        Style |    Bayes  |   Linear  |   MinMax  |   SerPar  | Row Total | 
## -------------|-----------|-----------|-----------|-----------|-----------|
##         Free |      301  |      175  |      430  |      102  |     1008  | 
##              |   29.861% |   17.361% |   42.659% |   10.119% |   50.000% | 
##              |   14.931% |    8.681% |   21.329% |    5.060% |           | 
## -------------|-----------|-----------|-----------|-----------|-----------|
##         Rest |      208  |      358  |      295  |      147  |     1008  | 
##              |   20.635% |   35.516% |   29.266% |   14.583% |   50.000% | 
##              |   10.317% |   17.758% |   14.633% |    7.292% |           | 
## -------------|-----------|-----------|-----------|-----------|-----------|
## Column Total |      509  |      533  |      725  |      249  |     2016  | 
## -------------|-----------|-----------|-----------|-----------|-----------|
## 
##

Effect of Model Depth

The model depth does not seem to affect the choice.

dd1 <- dd
dd1$Size <- dd1$Level
levels(dd1$Size) <- c("Small","Medium","Large")

interp <- function(x) pmin(x/6,1)
x<-xtabs(value ~ Size + Theory,data = dd1)
residuals = (x - 672/4)/sqrt(672/4)
mosaic(x,shade = TRUE,gp_args = list(interpolate = interp),labeling = labeling_values, residuals = residuals, gp_text = gpar(fontsize = 18), gp_labels = gpar(fontsize = 12))

CrossTable(x,format="SPSS",prop.chisq = FALSE, prop.c = FALSE)

## 
##    Cell Contents
## |-------------------------|
## |                   Count |
## |             Row Percent |
## |           Total Percent |
## |-------------------------|
## 
## Total Observations in Table:  2016 
## 
##              | Theory 
##         Size |    Bayes  |   Linear  |   MinMax  |   SerPar  | Row Total | 
## -------------|-----------|-----------|-----------|-----------|-----------|
##        Small |      174  |      155  |      200  |      143  |      672  | 
##              |   25.893% |   23.065% |   29.762% |   21.280% |   33.333% | 
##              |    8.631% |    7.688% |    9.921% |    7.093% |           | 
## -------------|-----------|-----------|-----------|-----------|-----------|
##       Medium |      190  |      187  |      238  |       57  |      672  | 
##              |   28.274% |   27.827% |   35.417% |    8.482% |   33.333% | 
##              |    9.425% |    9.276% |   11.806% |    2.827% |           | 
## -------------|-----------|-----------|-----------|-----------|-----------|
##        Large |      145  |      191  |      287  |       49  |      672  | 
##              |   21.577% |   28.423% |   42.708% |    7.292% |   33.333% | 
##              |    7.192% |    9.474% |   14.236% |    2.431% |           | 
## -------------|-----------|-----------|-----------|-----------|-----------|
## Column Total |      509  |      533  |      725  |      249  |     2016  | 
## -------------|-----------|-----------|-----------|-----------|-----------|
## 
##

Free Style Weighting

x<-xtabs(value ~ Size + Model,data = dd1[dd1$Style == "Free",])
residuals = (x - 336/4)/sqrt(336/4)
mosaic(x,shade = TRUE,gp_args = list(interpolate = interp),labeling = labeling_values, residuals = residuals,gp_text = gpar(fontsize = 18), gp_labels = gpar(fontsize = 12))

CrossTable(x,format="SPSS",prop.chisq = FALSE, prop.c = FALSE)

## 
##    Cell Contents
## |-------------------------|
## |                   Count |
## |             Row Percent |
## |           Total Percent |
## |-------------------------|
## 
## Total Observations in Table:  1008 
## 
##              | Model 
##         Size |    Bayes  |   Linear  |   MinMax  |   SerPar  | Row Total | 
## -------------|-----------|-----------|-----------|-----------|-----------|
##        Small |      114  |       61  |      112  |       49  |      336  | 
##              |   33.929% |   18.155% |   33.333% |   14.583% |   33.333% | 
##              |   11.310% |    6.052% |   11.111% |    4.861% |           | 
## -------------|-----------|-----------|-----------|-----------|-----------|
##       Medium |      108  |       54  |      142  |       32  |      336  | 
##              |   32.143% |   16.071% |   42.262% |    9.524% |   33.333% | 
##              |   10.714% |    5.357% |   14.087% |    3.175% |           | 
## -------------|-----------|-----------|-----------|-----------|-----------|
##        Large |       79  |       60  |      176  |       21  |      336  | 
##              |   23.512% |   17.857% |   52.381% |    6.250% |   33.333% | 
##              |    7.837% |    5.952% |   17.460% |    2.083% |           | 
## -------------|-----------|-----------|-----------|-----------|-----------|
## Column Total |      301  |      175  |      430  |      102  |     1008  | 
## -------------|-----------|-----------|-----------|-----------|-----------|
## 
##

Restricted Weighting

x<-xtabs(value ~ Size + Model,data = dd1[dd1$Style == "Rest",])
residuals = (x - 336/4)/sqrt(336/4)
mosaic(x,shade = TRUE,gp_args = list(interpolate = interp),labeling = labeling_values, residuals = residuals,gp_text = gpar(fontsize = 18), gp_labels = gpar(fontsize = 12))

CrossTable(x,format="SPSS",prop.chisq = FALSE, prop.c = FALSE)

## 
##    Cell Contents
## |-------------------------|
## |                   Count |
## |             Row Percent |
## |           Total Percent |
## |-------------------------|
## 
## Total Observations in Table:  1008 
## 
##              | Model 
##         Size |    Bayes  |   Linear  |   MinMax  |   SerPar  | Row Total | 
## -------------|-----------|-----------|-----------|-----------|-----------|
##        Small |       60  |       94  |       88  |       94  |      336  | 
##              |   17.857% |   27.976% |   26.190% |   27.976% |   33.333% | 
##              |    5.952% |    9.325% |    8.730% |    9.325% |           | 
## -------------|-----------|-----------|-----------|-----------|-----------|
##       Medium |       82  |      133  |       96  |       25  |      336  | 
##              |   24.405% |   39.583% |   28.571% |    7.440% |   33.333% | 
##              |    8.135% |   13.194% |    9.524% |    2.480% |           | 
## -------------|-----------|-----------|-----------|-----------|-----------|
##        Large |       66  |      131  |      111  |       28  |      336  | 
##              |   19.643% |   38.988% |   33.036% |    8.333% |   33.333% | 
##              |    6.548% |   12.996% |   11.012% |    2.778% |           | 
## -------------|-----------|-----------|-----------|-----------|-----------|
## Column Total |      208  |      358  |      295  |      147  |     1008  | 
## -------------|-----------|-----------|-----------|-----------|-----------|
## 
##

Effect of Sex

Nothing very substantial. Females prefer MinMax over Linear compared to Men.

interp <- function(x) pmin(x/6,1)
x<-xtabs(value ~ Gender + Model,data = dd)

res1 = (x[1,] - sum(x[1,])/4)/sqrt(sum(x[1,])/4)
res2 = (x[2,] - sum(x[2,])/4)/sqrt(sum(x[2,])/4)

mosaic(x,shade = TRUE,gp_args = list(interpolate = interp),labeling = labeling_values, residuals = rbind(res1,res2),gp_text = gpar(fontsize = 18), gp_labels = gpar(fontsize = 12))

Effect of Background

interp <- function(x) pmin(x/6,1)
x<-xtabs(value ~ FieldOfStudy + Model ,data = dd)
res1 = (x[1,] - sum(x[1,])/4)/sqrt(sum(x[1,])/4)
res2 = (x[2,] - sum(x[2,])/4)/sqrt(sum(x[2,])/4)
res3 = (x[3,] - sum(x[3,])/4)/sqrt(sum(x[3,])/4)
res4 = (x[4,] - sum(x[4,])/4)/sqrt(sum(x[4,])/4)
res5 = (x[5,] - sum(x[5,])/4)/sqrt(sum(x[5,])/4)
res6 = (x[6,] - sum(x[6,])/4)/sqrt(sum(x[6,])/4)
res7 = (x[7,] - sum(x[7,])/4)/sqrt(sum(x[7,])/4)

mosaic(x, 
       shade = TRUE,
       gp_args = list(interpolate = interp),
       labeling = labeling_values, 
       rot_labels = c(left = -20), 
       residuals = rbind(res1,res2,res3,res4,res5,res6,res6),
       labeling_args = list(offset_valnames = c(left = -4, top=0)),
       gp_text = gpar(fontsize = 18), 
       gp_labels = gpar(fontsize = 12))

xtabs(value ~ FieldOfStudy + Model,data = dd)

##                                        Model
## FieldOfStudy                            Bayes Linear MinMax SerPar
##   Business and Econ.                      103    133    178     54
##   Education                                70     56     70     20
##   Fine Arts (e.g. Music, Theater, Film)    44     57     46     33
##   Health Sciences                          27      5     31      9
##   Humanities                              102     89    142     63
##   Science and Tech.                        79    114    133     34
##   Social Sciences                          84     79    125     36

Effect of Background – Restricted

x<-xtabs(value ~ FieldOfStudy + Theory ,data = dd[dd$Style =="Rest",])
res1 = (x[1,] - sum(x[1,])/4)/sqrt(sum(x[1,])/4)
res2 = (x[2,] - sum(x[2,])/4)/sqrt(sum(x[2,])/4)
res3 = (x[3,] - sum(x[3,])/4)/sqrt(sum(x[3,])/4)
res4 = (x[4,] - sum(x[4,])/4)/sqrt(sum(x[4,])/4)
res5 = (x[5,] - sum(x[5,])/4)/sqrt(sum(x[5,])/4)
res6 = (x[6,] - sum(x[6,])/4)/sqrt(sum(x[6,])/4)
res7 = (x[7,] - sum(x[7,])/4)/sqrt(sum(x[7,])/4)

mosaic(x, 
       shade = TRUE,
       gp_args = list(interpolate = interp),
       labeling = labeling_values, 
       rot_labels = c(left = -20), 
       residuals = rbind(res1,res2,res3,res4,res5,res6,res6),
       labeling_args = list(offset_valnames = c(left = -4, top=0), 
                            abbreviate_varnames = TRUE),
       gp_text = gpar(fontsize = 18), 
       gp_labels = gpar(fontsize = 12))

xtabs(value ~ FieldOfStudy + Model,data = dd[dd$Style == "Rest",])

##                                        Model
## FieldOfStudy                            Bayes Linear MinMax SerPar
##   Business and Econ.                       42     86     72     34
##   Education                                29     41     25     13
##   Fine Arts (e.g. Music, Theater, Film)    15     35     28     12
##   Health Sciences                          16      2      9      9
##   Humanities                               47     62     51     38
##   Science and Tech.                        25     78     59     18
##   Social Sciences                          34     54     51     23

Effect of Background – Free Sytle

x<-xtabs(value ~ FieldOfStudy + Theory,data = dd[dd$Style =="Free",])
res1 = (x[1,] - sum(x[1,])/4)/sqrt(sum(x[1,])/4)
res2 = (x[2,] - sum(x[2,])/4)/sqrt(sum(x[2,])/4)
res3 = (x[3,] - sum(x[3,])/4)/sqrt(sum(x[3,])/4)
res4 = (x[4,] - sum(x[4,])/4)/sqrt(sum(x[4,])/4)
res5 = (x[5,] - sum(x[5,])/4)/sqrt(sum(x[5,])/4)
res6 = (x[6,] - sum(x[6,])/4)/sqrt(sum(x[6,])/4)
res7 = (x[7,] - sum(x[7,])/4)/sqrt(sum(x[7,])/4)

mosaic(x, 
       shade = TRUE,
       gp_args = list(interpolate = interp),
       labeling = labeling_values, 
       rot_labels = c(left = -20), 
       residuals = rbind(res1,res2,res3,res4,res5,res6,res6),
       labeling_args = list(offset_valnames = c(left = -4, top=0)),
       gp_text = gpar(fontsize = 18), 
       gp_labels = gpar(fontsize = 12))

xtabs(value ~ FieldOfStudy + Model,data = dd[dd$Style == "Free",])

##                                        Model
## FieldOfStudy                            Bayes Linear MinMax SerPar
##   Business and Econ.                       61     47    106     20
##   Education                                41     15     45      7
##   Fine Arts (e.g. Music, Theater, Film)    29     22     18     21
##   Health Sciences                          11      3     22      0
##   Humanities                               55     27     91     25
##   Science and Tech.                        54     36     74     16
##   Social Sciences                          50     25     74     13

… which is due to…

interp <- function(x) pmin(x/6,1)
x<-xtabs(value ~ FieldOfStudy + Gender,data = dd)
mosaic(x,
       shade = TRUE,
       gp_args = list(interpolate = interp),
       labeling = labeling_values,
       gp_text = gpar(fontsize = 18), 
       gp_labels = gpar(fontsize = 12),
       rot_labels = c(left = -20)
       )

##                                        Gender
## FieldOfStudy                            Female Male
##   Business and Econ.                       288  180
##   Education                                108  108
##   Fine Arts (e.g. Music, Theater, Film)     72  108
##   Health Sciences                           36   36
##   Humanities                               288  108
##   Science and Tech.                        108  252
##   Social Sciences                          216  108

Effect of Age

interp <- function(x) pmin(x/6,1)
x<-xtabs(value ~ Age + Model + Style,data = dd)
cotabplot(~ Age + Model | Style, 
          data = x, 
          panel = cotab_coindep, 
          n = 5000,
          rot_labels = c(left = -0),
          labeling = labeling_values,
          gp_text = gpar(fontsize = 20), 
          gp_labels = gpar(fontsize = 18),
          labeling_args = list(offset_valnames = c(left = -4, top=0))
          )

## , , Style = Free
## 
##              Model
## Age           Bayes Linear MinMax SerPar
##   21-29          53     13     61     17
##   30-39         136    110    224     34
##   40-49          76     49    109     36
##   50-59          22      3     33     14
##   60 or older    14      0      3      1
## 
## , , Style = Rest
## 
##              Model
## Age           Bayes Linear MinMax SerPar
##   21-29          34     44     50     16
##   30-39          95    207    140     62
##   40-49          56     77     85     52
##   50-59          22     18     15     17
##   60 or older     1     12      5      0