tutoring2

source(here::here("R/load.R"))

df <- read_excel("refs/Omega2.xlsx")

## New names:
## * `` -> ...1

df %>% 
    column_to_rownames("...1") %>% 
    as.matrix() -> mtr
mtr[upper.tri(mtr, diag = TRUE)] <- NA
df <- 
    mtr %>% 
    as.data.frame() %>% 
    rownames_to_column("lhs") %>% 
    gather(rhs, value, -lhs) %>% 
    na.omit()

df %>% head

##        lhs      rhs        value
## 2 F010201A F010101A 8.556040e-01
## 3 F010401A F010101A 2.439833e-02
## 4 F010601A F010101A 7.244093e-09
## 5 F011201A F010101A 9.276003e-03
## 6 F011401A F010101A 6.595367e-03
## 7 F011601A F010101A 1.843249e-15

get_n_unique <- function(cutoff = 0.01, df){
    df %>% 
    filter(value > cutoff) %>% 
    summarise(
        n_distinct(lhs)
    ) %>% 
    pull
}

map_int(list(0.01, 0.05, 0.09), get_n_unique, df = df)

## [1] 31 22 18

选出阈值最多那一个 cutoff 即可。

This site is open source. Improve this page.