Venn Calculator

How to use Venn Calculator?

ggVennDiagram has a series of set operation functions, and this can be used as the Venn calculator.

library(ggVennDiagram)
set.seed(20231225)
y = list(
  A = sample(letters, 8) |> sort(),
  B = sample(letters, 8) |> sort(),
  C = sample(letters, 8) |> sort(),
  D = sample(letters, 8) |> sort())

y
#> $A
#> [1] "a" "e" "g" "o" "p" "s" "t" "v"
#> 
#> $B
#> [1] "a" "d" "f" "i" "k" "s" "y" "z"
#> 
#> $C
#> [1] "b" "g" "k" "o" "r" "s" "u" "w"
#> 
#> $D
#> [1] "b" "c" "e" "h" "k" "q" "s" "y"

First of all, we need to construct a Venn class object with this list. If you print this object, it will give meta information of the object.

venn_y = Venn(y)

venn_y
#> An object of class 'Venn':
#>    Slots: sets, names;
#>    No. Sets: 4   SetNames: A, B, C, D.
  • Find the overlapping members of two or more sets.

    overlap(venn_y, 1:2) # members in both the first two sets
    #> [1] "a" "s"
    overlap(venn_y) # members in all the sets
    #> [1] "s"
  • Find the different members between sets and set unions

    discern(venn_y, 1)  # members in set 1, but not in the resting sets
    #> [1] "p" "t" "v"
    discern(venn_y, c("A","B"), 3) # members in set A & B, but not in the 3rd set
    #>  [1] "a" "e" "p" "t" "v" "d" "f" "i" "y" "z"
  • Find the specific members in one or more sets

    discern_overlap(venn_y, 1)  # specific items in set 1
    #> [1] "p" "t" "v"
    discern_overlap(venn_y, 1:2)  # specific items in set 1 and set 2
    #> [1] "a"
  • Find the union of two or more sets

    unite(venn_y, 1:2) # union of set 1 and 2
    #>  [1] "a" "e" "g" "o" "p" "s" "t" "v" "d" "f" "i" "k" "y" "z"
    unite(venn_y, "all") # union of all four sets
    #>  [1] "a" "e" "g" "o" "p" "s" "t" "v" "d" "f" "i" "k" "y" "z" "b" "r" "u" "w" "c"
    #> [20] "h" "q"
    unite(venn_y, c("A", "B", "C"))
    #>  [1] "a" "e" "g" "o" "p" "s" "t" "v" "d" "f" "i" "k" "y" "z" "b" "r" "u" "w"

Combined results were provided as VennPlotData object.

pd = process_data(venn_y)
pd
#> Class VennPlotData - '401f'
#>   Type: ellipse; No. sets: 4; No. regions: 15.
#>   To view this shape, use `plot_shape_edge(get_shape_by_id('401f'))`.
#>   To view its components, use `venn_setedge()`, `venn_setlabel()`, etc.
  • venn_set(): get set data from the object.

    venn_set(pd)
    #> # A tibble: 4 × 4
    #>   id    name  item         count
    #>   <chr> <chr> <named list> <int>
    #> 1 1     A     <chr [8]>        8
    #> 2 2     B     <chr [8]>        8
    #> 3 3     C     <chr [8]>        8
    #> 4 4     D     <chr [8]>        8
  • venn_region(): get subsets data from the object.

    venn_region(pd)
    #> # A tibble: 15 × 4
    #>    id      name    item      count
    #>    <chr>   <chr>   <list>    <int>
    #>  1 1       A       <chr [3]>     3
    #>  2 2       B       <chr [4]>     4
    #>  3 3       C       <chr [3]>     3
    #>  4 4       D       <chr [3]>     3
    #>  5 1/2     A/B     <chr [1]>     1
    #>  6 1/3     A/C     <chr [2]>     2
    #>  7 1/4     A/D     <chr [1]>     1
    #>  8 2/3     B/C     <chr [0]>     0
    #>  9 2/4     B/D     <chr [1]>     1
    #> 10 3/4     C/D     <chr [1]>     1
    #> 11 1/2/3   A/B/C   <chr [0]>     0
    #> 12 1/2/4   A/B/D   <chr [0]>     0
    #> 13 1/3/4   A/C/D   <chr [0]>     0
    #> 14 2/3/4   B/C/D   <chr [1]>     1
    #> 15 1/2/3/4 A/B/C/D <chr [1]>     1

Please note in order to keep the result concise, the containing items are nested. You may use the following methods to further process it.

  • Method 1

    venn_region(pd) |> tidyr::unnest(item)
    #> # A tibble: 21 × 4
    #>    id    name  item  count
    #>    <chr> <chr> <chr> <int>
    #>  1 1     A     p         3
    #>  2 1     A     t         3
    #>  3 1     A     v         3
    #>  4 2     B     d         4
    #>  5 2     B     f         4
    #>  6 2     B     i         4
    #>  7 2     B     z         4
    #>  8 3     C     r         3
    #>  9 3     C     u         3
    #> 10 3     C     w         3
    #> # ℹ 11 more rows
  • Method 2

    venn_region(pd) |> dplyr::rowwise() |> dplyr::mutate(item = paste0(item, collapse = ", "))
    #> # A tibble: 15 × 4
    #> # Rowwise: 
    #>    id      name    item         count
    #>    <chr>   <chr>   <chr>        <int>
    #>  1 1       A       "p, t, v"        3
    #>  2 2       B       "d, f, i, z"     4
    #>  3 3       C       "r, u, w"        3
    #>  4 4       D       "c, h, q"        3
    #>  5 1/2     A/B     "a"              1
    #>  6 1/3     A/C     "g, o"           2
    #>  7 1/4     A/D     "e"              1
    #>  8 2/3     B/C     ""               0
    #>  9 2/4     B/D     "y"              1
    #> 10 3/4     C/D     "b"              1
    #> 11 1/2/3   A/B/C   ""               0
    #> 12 1/2/4   A/B/D   ""               0
    #> 13 1/3/4   A/C/D   ""               0
    #> 14 2/3/4   B/C/D   "k"              1
    #> 15 1/2/3/4 A/B/C/D "s"              1

Reference

Some of these above-mentioned functions are originally developed by Turgut Yigit Akyol in RVenn.