import numpy as np
import matplotlib.pyplot as plt
import warnings
from string import ascii_uppercase
from collections import Counter
from scipy.spatial.distance import pdist, squareform
from scipy.optimize import minimize, NonlinearConstraint
from shapely import intersection_all, union_all
from shapely.geometry import Point
from shapely.ops import polylabel
from matplotlib.colors import to_rgba
from matplotlib.path import Path
from matplotlib.collections import PolyCollection
from wordcloud import WordCloud
from ._utils import (
get_subset_ids,
get_subsets,
blend_colors,
rgba_to_grayscale,
get_text_alignment,
)
# type hinting
from typing import (
Any,
Tuple,
Optional,
Callable,
Mapping,
Union,
)
from numpy.typing import NDArray
from matplotlib.typing import ColorType
from matplotlib.image import AxesImage
from shapely import (
Polygon as ShapelyPolygon,
MultiPolygon as ShapelyMultiPolygon,
)
class SetDiagram:
"""Draw a diagram visualising the relationships between two or
more sets using two or more overlapping circles.
Parameters
----------
origins : NDArray
The circle origins.
radii : NDArray
The circle radii.
subset_labels : Optional[Mapping[Tuple[bool], str]]
A dictionary mapping subsets to their labels or None. If None, no subset labels are created.
Subsets are represented by tuples of booleans using the inclusion/exclusion nomenclature, i.e.
each entry in the tuple indicates if the corresponding set is a superset of the subset.
For example, given the sets A, B, C, the subset (1, 1, 1) corresponds to the intersection of all three sets,
whereas (1, 1, 0) is the subset formed by the difference between the intersection of A with B, and C.
set_labels : Optional[list[str]]
A list of set labels.
If None, no subset labels are created.
set_colors : Optional[list[ColorType]]
A corresponding list of matplotlib colors.
If none, defaults to the default matplotlib color cycle.
ax : Optional[plt.Axes]
The matplotlib axis instance to draw onto.
If none provided, a new figure with a single axis is instantiated.
Attributes
----------
subset_geometries : dict[Tuple[bool], shapely.geometry.polygon.Polygon]
The dictionary mapping each subset to its shapely geometry.
subset_artists : dict[tuple[bool], plt.Polygon]
The matplotlib Polygon patches representing each subset.
subset_label_artists : dict[tuple[bool], plt.Text]
The matplotlib text objects used to label each subset.
set_label_artists : list[plt.Text]
The matplotlib text objects used to label each set.
ax : plt.Axes
The matplotlib axis instance.
"""
def __init__(
self,
origins : NDArray,
radii : NDArray,
subset_labels : Optional[Mapping[Tuple[bool], str]] = None,
set_labels : Optional[list[str]] = None,
set_colors : Optional[list] = None,
ax : Optional[plt.Axes] = None,
) -> None:
total_sets = len(origins)
subset_ids = get_subset_ids(total_sets)
self.subset_geometries : ShapelyPolygon = \
self._get_subset_geometries(subset_ids, origins, radii)
if set_colors is None:
set_colors = plt.rcParamsDefault['axes.prop_cycle'].by_key()['color']
set_colors = set_colors[:total_sets]
self.set_colors = set_colors
self.subset_colors = self._get_subset_colors(subset_ids, self.set_colors)
self.ax = self._initialize_axis(ax=ax)
self.subset_artists = self._draw_subsets(
self.subset_geometries, self.subset_colors, self.ax)
self.set_artists = self._draw_sets(
origins, radii, self.set_colors, self.ax)
if subset_labels:
self.subset_label_artists = self._draw_subset_labels(
subset_labels, self.subset_geometries, self.subset_colors, self.ax)
if set_labels:
self.set_label_artists = self._draw_set_labels(
set_labels, origins, radii, self.ax)
def _get_subset_geometries(
self,
subsets : list[Tuple[bool]],
origins : NDArray,
radii : NDArray
) -> dict[Tuple[bool], ShapelyPolygon]:
"Compute each subset polygon as a shapely geometry object."
set_geometries = [Point(*origin).buffer(radius) for origin, radius in zip(origins, radii)]
subset_geometries = dict()
for subset in subsets:
include = intersection_all([set_geometries[ii] for ii, include in enumerate(subset) if include])
exclude = union_all([set_geometries[ii] for ii, include in enumerate(subset) if not include])
subset_geometries[subset] = include.difference(exclude)
return subset_geometries
def _get_subset_colors(
self,
subsets : list[Tuple[bool]],
set_colors : list[ColorType],
) -> dict[Tuple[bool], NDArray]:
"""Determine the color of each subset patch based on the colors of the overlapping sets."""
subset_colors = dict()
for subset in subsets:
subset_colors[subset] = blend_colors([set_colors[ii] for ii, is_superset in enumerate(subset) if is_superset])
return subset_colors
def _initialize_axis(self, ax : Optional[plt.Axes] = None) -> plt.Axes:
"""Initialize the axis if none provided. Ensure that the
aspect is equal such that circles are circles and not
ellipses.
"""
if ax is None:
fig, ax = plt.subplots()
ax.set_aspect("equal")
ax.axis("off")
return ax
def _draw_subsets(
self,
subset_geometries : Mapping[Tuple[bool], ShapelyPolygon],
subset_colors : Mapping[Tuple[bool], NDArray],
ax : plt.Axes,
) -> dict[Tuple[bool], Union[plt.Polygon, PolyCollection]]:
"""Draw each subset as a separate polygon patch."""
subset_artists : dict[Tuple[bool], Union[plt.Polygon, PolyCollection]] = dict()
for subset, geometry in subset_geometries.items():
if geometry.area > 0:
if isinstance(geometry, ShapelyPolygon):
polygon = plt.Polygon(geometry.exterior.coords, color=subset_colors[subset])
ax.add_patch(polygon)
subset_artists[subset] = polygon
elif isinstance(geometry, ShapelyMultiPolygon):
polygon_collection = PolyCollection([geom.exterior.coords for geom in geometry.geoms], color=subset_colors[subset])
ax.add_collection(polygon_collection)
subset_artists[subset] = polygon_collection
else:
raise TypeError(f"Shapely returned neither a Polygon or MultiPolygon but instead {type(geometry)} object!")
ax.autoscale_view()
return subset_artists
def _draw_subset_labels(
self,
subset_labels : Mapping[Tuple[bool], str],
subset_geometries : Mapping[Tuple[bool], ShapelyPolygon],
subset_colors : Mapping[Tuple[bool], NDArray],
ax : plt.Axes,
polylabel_tolerance : float = 1e-2,
) -> dict[Tuple[bool], plt.Text]:
"""Place subset labels centred on the point of inaccessibility
(POI) of the corresponding polygon.
"""
subset_label_artists = dict()
for subset, label in subset_labels.items():
geometry = subset_geometries[subset]
if geometry.area > 0:
if isinstance(geometry, ShapelyPolygon):
poi = polylabel(geometry, tolerance=polylabel_tolerance)
elif isinstance(geometry, ShapelyMultiPolygon):
# use largest sub-geometry
poi = polylabel(max(geometry.geoms, key=lambda x:x.area), tolerance=polylabel_tolerance)
else:
raise TypeError(f"Shapely returned neither a Polygon or MultiPolygon but instead {type(geometry)} object!")
fontcolor = "black" if rgba_to_grayscale(*subset_colors[subset]) > 0.5 else "white"
subset_label_artists[subset] = ax.text(
poi.x, poi.y, label,
fontsize="small", color=fontcolor, va="center", ha="center"
)
return subset_label_artists
def _draw_set_labels(
self,
set_labels : list[str],
origins : NDArray,
radii : NDArray,
ax : plt.Axes,
offset : float = 0.1,
) -> list[plt.Text]:
"""Place the set label on the side opposite to the centroid of all other sets."""
set_label_artists = []
for ii, label in enumerate(set_labels):
delta = origins[ii] - np.mean([origin for jj, origin in enumerate(origins) if ii != jj], axis=0)
x, y = origins[ii] + (1 + offset) * radii[ii] * delta / np.linalg.norm(delta)
ha, va = get_text_alignment(*delta)
set_label_artists.append(ax.text(x, y, label, fontsize="large", ha=ha, va=va))
return set_label_artists
def _draw_sets(
self,
origins : NDArray,
radii : NDArray,
set_colors : list[ColorType],
ax : plt.Axes,
) -> list[plt.Circle]:
set_artists = []
for origin, radius, color in zip(origins, radii, set_colors):
artist = plt.Circle(origin, radius, color=color, fill=False)
ax.add_patch(artist)
artist.set_visible(False)
set_artists.append(artist)
return set_artists
[docs]
class EulerDiagram(SetDiagram):
"""Create an area-proportional Euler diagram visualising the relationships
between two or more sets given the subset sizes.
Sets are represented through overlapping circles, and the relative
arrangement of these circles is determined through a minimisation
procedure that attempts to match subset sizes to the corresponding
areas formed by circle overlaps in the diagram.
Parameters
----------
subset_sizes : Mapping[Tuple[bool], int | float]
A dictionary mapping each subset to its desired size.
Subsets are represented by tuples of booleans using the inclusion/exclusion nomenclature, i.e.
each entry in the tuple indicates if the corresponding set is a superset of the subset.
For example, given the sets A, B, C, the subset (1, 1, 1) corresponds to the intersection of all three sets,
whereas (1, 1, 0) is the subset formed by the difference between the intersection of A with B, and C.
subset_labels : Optional[Mapping[Tuple[bool], str]]
A dictionary mapping each subset to its desired label or None. If None,
the subset_label_formatter is used create subset labels based on the subset sizes.
subset_label_formatter : Callable[[Tuple[bool], int | float], str]
The formatter used to create subset labels based on the subset sizes.
The argument is ignored if subset_labels are not None.
set_labels : Optional[list[str]]
A list of set labels.
If none, defaults to the letters of the alphabet (capitalized).
set_colors : Optional[list[ColorType]]
A corresponding list of matplotlib colors.
If none, defaults to the default matplotlib color cycle.
cost_function_objective : str
The cost function objective; one of:
- 'simple' : :code:`|x - y|`
- 'squared' : :code:`(x - y)^2`
- 'logarithmic' : :code:`|log(x + 1) - log(y + 1)|`
- 'relative' : :code:`1 - min(x/y, y/x)`
- 'inverse' : :code:`|1 / (x + epsilon) - 1 / (y + epsilon)|`
verbose : bool
Print a report of the optimisation process.
ax : Optional[plt.Axes]
The matplotlib axis instance to draw onto.
If none provided, a new figure with a single axis is instantiated.
Attributes
----------
origins : NDArray
The circle origins.
radii : NDArray
The circle radii.
subset_geometries : dict[Tuple[bool], shapely.geometry.polygon.Polygon]
The dictionary mapping each subset to its shapely geometry.
subset_artists : dict[tuple[bool], plt.Polygon]
The matplotlib Polygon patches representing each subset.
subset_label_artists : dict[tuple[bool], plt.Text]
The matplotlib text objects used to label each subset.
set_label_artists : list[plt.Text]
The matplotlib text objects used to label each set.
ax : plt.Axes
The matplotlib axis instance.
"""
def __init__(
self,
subset_sizes : Mapping[Tuple[bool], Union[int, float]],
subset_labels : Optional[Mapping[Tuple[bool], str]] = None,
subset_label_formatter : Callable[[Tuple[bool], Union[int, float]], str] = lambda subset, size : str(size),
set_labels : Optional[list[str]] = None,
set_colors : Optional[list[ColorType]] = None,
cost_function_objective : str = "inverse",
verbose : bool = False,
ax : Optional[plt.Axes] = None,
) -> None:
self.origins, self.radii = self._get_layout(
subset_sizes, cost_function_objective, verbose)
if subset_labels is None:
subset_labels = self._get_subset_labels(
subset_sizes, subset_label_formatter)
if set_labels is None:
set_labels = self._get_set_labels(len(self.origins))
super().__init__(
self.origins, self.radii,
subset_labels = subset_labels,
set_labels = set_labels,
set_colors = set_colors,
ax = ax,
)
self._hide_empty_subsets(subset_sizes)
def _get_layout(
self,
subset_sizes : Mapping[Tuple[bool], Union[int, float]],
cost_function_objective : str,
verbose : bool
) -> Tuple[NDArray, NDArray]:
origins, radii = self._initialize_layout(subset_sizes)
origins, radii = self._optimize_layout(subset_sizes, origins, radii,
cost_function_objective,
verbose=verbose)
self._raise_warning_if_there_are_more_nonempty_subsets_than_can_be_displayed(
len(origins), subset_sizes
)
self._raise_warning_if_there_are_zero_area_non_empty_subsets(
subset_sizes, origins, radii
)
return origins, radii
def _initialize_layout(self, subset_sizes : Mapping[Tuple[bool], Union[int, float]]) -> Tuple[NDArray, NDArray]:
set_sizes = self._get_set_sizes(subset_sizes)
radii = self._initialize_radii(set_sizes)
origins = self._initialize_origins(radii)
return origins, radii
def _get_set_sizes(self, subset_sizes : Mapping[Tuple[bool], Union[int, float]]) -> NDArray:
"""Compute the size of each set based on the sizes of its constituent sub-sets"""
return np.sum([size * np.array(subset) for subset, size in subset_sizes.items()], axis=0)
def _initialize_radii(self, areas : NDArray) -> NDArray:
"""Map set sizes onto circle radii."""
return np.array([np.sqrt(area / np.pi) for area in areas])
def _initialize_origins(self, radii : NDArray) -> NDArray:
"""The optimisation procedure uses gradient descent to find
the circle arrangement that best matches the desired subset
areas. If a subset area is zero, there is no gradient to
follow. It is hence paramount that all subset areas exist at
initialization.
Here, we evenly space the circle origins around the center of
the diagram, such that their circumferences touch. We then
shift each circle origin towards that center, such that all
circles overlap.
"""
x0, y0 = 0, 0 # diagram center
total_sets = len(radii)
angles = 2 * np.pi * np.linspace(0, 1 - 1/total_sets, total_sets)
angles += np.pi # place origin of first set on the left, not the right
overlap = 0.5 * np.min(radii)
distances = radii - overlap
x = x0 + distances * np.cos(angles)
y = y0 + distances * np.sin(angles)
return np.c_[x, y]
def _optimize_layout(
self,
subset_sizes : Mapping[Tuple[bool], Union[int, float]],
origins : NDArray,
radii : NDArray,
objective : str,
verbose : bool
) -> Tuple[NDArray, NDArray]:
"""Optimize the placement of circle origins according to the
given cost function objective.
"""
desired_areas = np.array(list(subset_sizes.values()))
def cost_function(flattened_origins):
origins = flattened_origins.reshape(-1, 2)
subset_areas = np.array(
[geometry.area for geometry in self._get_subset_geometries(subset_sizes.keys(), origins, radii).values()]
)
if objective == "simple":
cost = subset_areas - desired_areas
elif objective == "squared":
cost = (subset_areas - desired_areas)**2
elif objective == "relative":
with warnings.catch_warnings():
warnings.filterwarnings("ignore", message="divide by zero encountered in scalar divide")
cost = [1 - min(x/y, y/x) if x != y else 0. for x, y in zip(subset_areas, desired_areas)]
elif objective == "logarithmic":
cost = np.log(subset_areas + 1) - np.log(desired_areas + 1)
elif objective == "inverse":
eps = 1e-2 * np.sum(desired_areas)
cost = 1 / (subset_areas + eps) - 1 / (desired_areas + eps)
else:
msg = f"The provided cost function objective is not implemented: {objective}."
msg += "\nAvailable objectives are: 'simple', 'squared', 'logarithmic', 'relative', and 'inverse'."
raise ValueError(msg)
return np.sum(np.abs(cost))
# constraints:
eps = np.min(radii) * 0.01
lower_bounds = np.abs(radii[np.newaxis, :] - radii[:, np.newaxis]) - eps
lower_bounds[lower_bounds < 0] = 0
lower_bounds = squareform(lower_bounds)
upper_bounds = radii[np.newaxis, :] + radii[:, np.newaxis] + eps
upper_bounds -= np.diag(np.diag(upper_bounds)) # squareform requires zeros on diagonal
upper_bounds = squareform(upper_bounds)
def constraint_function(flattened_origins):
origins = np.reshape(flattened_origins, (-1, 2))
return pdist(origins)
distance_between_origins = NonlinearConstraint(
constraint_function, lb=lower_bounds, ub=upper_bounds)
result = minimize(
cost_function,
origins.flatten(),
method='SLSQP',
constraints=[distance_between_origins],
options=dict(disp=verbose, eps=eps)
)
if not result.success:
feedback = "Could not optimise layout for the given subsets. Try a different cost function objective."
warnings.warn(f"{result.message}. {feedback}")
origins = result.x.reshape((-1, 2))
return origins, radii
def _raise_warning_if_there_are_more_nonempty_subsets_than_can_be_displayed(
self,
total_sets : int,
subset_sizes : Mapping[Tuple[bool], Union[int, float]],
):
"""Compute the theoretical maximum number of plane divisions
by circles, and raise a warning, if the number of non-empty
subsets exceeds the number of areas.
References
----------
[1] https://mathworld.wolfram.com/PlaneDivisionbyCircles.html
"""
maximum_number_of_regions = total_sets**2 - total_sets + 1
total_non_empty_subsets = np.sum(np.array(list(subset_sizes.values())) > 0)
if total_non_empty_subsets > maximum_number_of_regions:
msg = f"The number of non-empty subsets is {total_non_empty_subsets}."
msg += f" However, plane division by {total_sets} circles can only create {maximum_number_of_regions} enclosed regions."
msg += f" Some subsets cannot be visualized."
warnings.warn(msg)
def _raise_warning_if_there_are_zero_area_non_empty_subsets(
self,
subset_sizes : Mapping[Tuple[bool], Union[int, float]],
origins : NDArray,
radii : NDArray,
):
"""Raise a warning if the layout routine failed to find a solution that displays all non-empty subsets."""
subset_geometries = self._get_subset_geometries(subset_sizes.keys(), origins, radii)
for subset, size in subset_sizes.items():
if np.isclose(subset_geometries[subset].area, 0) & (size > 0):
warnings.warn(f"Layout engine failed to find a solution that displays the non-empty subset {subset} with size {size}")
def _get_set_labels(self, total_sets : int) -> list[str]:
return [char for char in ascii_uppercase[:total_sets]]
def _get_subset_labels(
self,
subset_sizes : Mapping[Tuple[bool], Union[int, float]],
formatter : Callable[[Tuple[bool], Union[int, float]], str],
) -> dict[Tuple[bool], str]:
"""Map subset sizes to strings using the provided formatter."""
subset_labels = dict()
for subset, size in subset_sizes.items():
subset_labels[subset] = formatter(subset, size)
return subset_labels
def _hide_empty_subsets(self, subset_sizes : Mapping[Tuple[bool], Union[int, float]]) -> None:
"""If the layout routine assigned a non-zero area to a zero-size subset, hide it."""
for subset, size in subset_sizes.items():
if (size == 0) & (self.subset_geometries[subset].area > 0):
self.subset_artists[subset].set_visible(False)
self.subset_label_artists[subset].set_visible(False)
[docs]
@classmethod
def from_sets(cls, sets, *args, **kwargs):
"""Instantiate the set diagram from a list of sets, rather than subset sizes.
Parameters
----------
sets : list[set]
The sets.
subset_labels : Optional[Mapping[Tuple[bool], str]]
A dictionary mapping each subset to its desired label or None. If None,
the subset_label_formatter is used create subset labels based on the subset sizes.
subset_label_formatter : Callable[[Tuple[bool], int | float], str]
The formatter used to create subset labels based on the subset sizes.
The argument is ignored if subset_labels are not None.
set_labels : Optional[list[str]]
A list of set labels.
If none, defaults to the letters of the alphabet (capitalized).
set_colors : Optional[list[ColorType]]
A corresponding list of matplotlib colors.
If none, defaults to the default matplotlib color cycle.
cost_function_objective : str
The cost function objective; one of:
- 'simple' : :code:`|x - y|`
- 'squared' : :code:`(x - y)^2`
- 'logarithmic' : :code:`|log(x + 1) - log(y + 1)|`
- 'relative' : :code:`1 - min(x/y, y/x)`
- 'inverse' : :code:`|1 / (x + epsilon) - 1 / (y + epsilon)|`
Only applicable when instantiating an :code:`EulerDiagram`.
verbose : bool
Print a report of the optimisation process.
Only applicable when instantiating an :code:`EulerDiagram`.
ax : Optional[plt.Axes]
The matplotlib axis instance to draw onto.
If none provided, a new figure with a single axis is instantiated.
Attributes
----------
origins : NDArray
The circle origins.
radii : NDArray
The circle radii.
subset_geometries : dict[Tuple[bool], shapely.geometry.polygon.Polygon]
The dictionary mapping each subset to its shapely geometry.
subset_artists : dict[tuple[bool], plt.Polygon]
The matplotlib Polygon patches representing each subset.
subset_label_artists : dict[tuple[bool], plt.Text]
The matplotlib text objects used to label each subset.
set_label_artists : list[plt.Text]
The matplotlib text objects used to label each set.
ax : plt.Axes
The matplotlib axis instance.
"""
subsets = get_subsets(sets)
subset_sizes = {subset_id : len(subset) for subset_id, subset in subsets.items()}
class_instance = cls(subset_sizes, *args, **kwargs)
return class_instance
[docs]
@classmethod
def as_wordcloud(cls, sets, minimum_resolution=300, wordcloud_kwargs=dict(), *args, **kwargs):
"""Generate a set diagram with word clouds displaying the subset items.
Parameters
----------
sets : list[set]
The sets.
minimum_resolution : int
The minimum extent of the wordcloud image in pixels (i.e. :code:`min(width, height)`).
Larger images take significantly longer to generate.
wordcloud_kwargs : dict[str, Any]
Key word arguments passed through to wordcloud.WordCloud.
Consult the wordcloud documentation [1]_ for a complete list.
However, the following arguments are reserved:
- :code:`mode = 'RGBA'`
- :code:`background = None`
- :code:`color_func = lambda *args, **kwargs : subset_color`
subset_labels : Optional[Mapping[Tuple[bool], str]]
A dictionary mapping each subset to its desired label or None. If None,
the subset_label_formatter is used create subset labels based on the subset sizes.
subset_label_formatter : Callable[[Tuple[bool], int | float], str]
The formatter used to create subset labels based on the subset sizes.
The argument is ignored if subset_labels are not None.
set_labels : Optional[list[str]]
A list of set labels.
If none, defaults to the letters of the alphabet (capitalized).
set_colors : Optional[list[ColorType]]
A corresponding list of matplotlib colors.
If none, defaults to the default matplotlib color cycle.
cost_function_objective : str
The cost function objective; one of:
- 'simple' : :code:`|x - y|`
- 'squared' : :code:`(x - y)^2`
- 'logarithmic' : :code:`|log(x + 1) - log(y + 1)|`
- 'relative' : :code:`1 - min(x/y, y/x)`
- 'inverse' : :code:`|1 / (x + epsilon) - 1 / (y + epsilon)|`
Only applicable when instantiating an :code:`EulerDiagram`.
verbose : bool
Print a report of the optimisation process.
Only applicable when instantiating an :code:`EulerDiagram`.
ax : Optional[plt.Axes]
The matplotlib axis instance to draw onto.
If none provided, a new figure with a single axis is instantiated.
Attributes
----------
origins : NDArray
The circle origins.
radii : NDArray
The circle radii.
subset_geometries : dict[Tuple[bool], shapely.geometry.polygon.Polygon]
The dictionary mapping each subset to its shapely geometry.
subset_artists : dict[tuple[bool], plt.Polygon]
The matplotlib Polygon patches representing each subset.
subset_label_artists : dict[tuple[bool], plt.Text]
The matplotlib text objects used to label each subset.
set_label_artists : list[plt.Text]
The matplotlib text objects used to label each set.
ax : plt.Axes
The matplotlib axis instance.
wordcloud : matplotlib.image.AxesImage
The WordCloud image.
References
----------
.. [1] https://amueller.github.io/word_cloud/generated/wordcloud.WordCloud.html
"""
subsets = get_subsets(sets)
subset_sizes = {subset_id : len(subset) for subset_id, subset in subsets.items()}
class_instance = cls(subset_sizes, *args, **kwargs)
class_instance._make_subsets_transparent()
class_instance.wordcloud = class_instance._generate_wordcloud(
subsets,
subset_geometries = class_instance.subset_geometries,
minimum_resolution = minimum_resolution,
wordcloud_kwargs = wordcloud_kwargs,
ax = class_instance.ax,
)
return class_instance
def _make_subsets_transparent(self) -> None:
"""Make subset faces and subset labels transparent, as they
would overlap with the word cloud text otherwise.
"""
# We don't use artist.set_alpha(0), as this would also make the artist
# edge transparent as well.
for subset, artist in self.subset_artists.items():
r, g, b, a = to_rgba(artist.get_facecolor()) # type: ignore
artist.set_facecolor((r, g, b, 0.))
for subset, label in self.subset_label_artists.items():
label.set_visible(False)
def _generate_wordcloud(
self,
subsets : dict[Tuple[bool], set[str]],
subset_geometries : dict[Tuple[bool], ShapelyPolygon],
minimum_resolution : int,
wordcloud_kwargs : dict[str, Any],
ax : plt.Axes,
) -> AxesImage:
subset_masks = self._get_subset_masks(
subset_geometries, minimum_resolution, ax)
subset_images = [
self._generate_subset_wordcloud(
subset = subsets[subset_id],
mask = mask,
rgba = self.subset_colors[subset_id],
wordcloud_kwargs = wordcloud_kwargs,
) for subset_id, mask in subset_masks.items() if subsets[subset_id]
]
combined_image = np.sum(subset_images, axis=0)
return self.ax.imshow(combined_image / 255, interpolation="bilinear", extent=ax.axis())
def _get_subset_masks(self, subset_geometries, minimum_resolution, ax):
xmin, xmax = ax.get_xlim()
ymin, ymax = ax.get_ylim()
dx = xmax - xmin
dy = ymax - ymin
if dx < dy:
width_in_pixel = minimum_resolution
height_in_pixel = int(dy / dx * minimum_resolution)
else:
width_in_pixel = int(dx / dy * minimum_resolution)
height_in_pixel = minimum_resolution
X, Y = np.meshgrid(np.linspace(xmin, xmax, width_in_pixel),
np.linspace(ymin, ymax, height_in_pixel))
XY = np.c_[X.ravel(), Y.ravel()]
subset_masks = dict()
for subset_id, geometry in subset_geometries.items():
if geometry.area > 0:
path = Path(geometry.exterior.coords)
mask = path.contains_points(XY).reshape((height_in_pixel, width_in_pixel))
mask = np.flipud(mask) # image origin is in the upper left
subset_masks[subset_id] = mask
return subset_masks
def _generate_subset_wordcloud(
self,
subset : set[str],
mask : NDArray,
rgba : NDArray,
wordcloud_kwargs : dict[str, Any],
) -> NDArray:
mask = 255 * np.invert(mask).astype(np.uint8) # black is filled by WordCloud
rgba_as_tuple = tuple(int(255 * channel) for channel in rgba)
wc = WordCloud(
mask = mask,
mode = "RGBA",
background_color = None,
color_func = lambda *args, **kwargs : rgba_as_tuple,
**wordcloud_kwargs
)
return wc.generate_from_frequencies(Counter(subset)).to_array()
[docs]
class VennDiagram(EulerDiagram):
"""Create an area-equal Venn diagram visualising the relationships
between two or more sets.
Sets are represented through overlapping circles. The size of a
subset is indicated by the label of the corresponding patch; the
size of the patch, however, is not indicative of the size of the
subset, such that even zero-size subsets can be represented.
Parameters
----------
subset_sizes : Mapping[Tuple[bool], int | float]
The dictionary mapping each subset to its size.
Subsets are represented by tuples of booleans using the inclusion/exclusion nomenclature, i.e.
each entry in the tuple indicates if the corresponding set is a superset of the subset.
For example, given the sets A, B, C, the subset (1, 1, 1) corresponds to the intersection of all three sets,
whereas (1, 1, 0) is the subset formed by the difference between the intersection of A with B, and C.
subset_labels : Optional[Mapping[Tuple[bool], str]]
A dictionary mapping each subset to its desired label.
If None, the subset_label_formatter is used create subset labels based on the subset sizes.
subset_label_formatter : Callable[[Tuple[bool], int | float], str]
The formatter used to create subset labels based on the subset sizes.
The argument is ignored if subset_labels are not None.
set_labels : Optional[list[str]]
A list of set labels.
If none, defaults to the letters of the alphabet (capitalized).
set_colors : Optional[list[ColorType]]
A corresponding list of matplotlib colors.
If none, defaults to the default matplotlib color cycle.
ax : Optional[plt.Axes]
The matplotlib axis instance to draw onto.
If none provided, a new figure with a single axis is instantiated.
Attributes
----------
subset_areas : Mapping[Tuple[bool], int | float]
The dictionary mapping each subset to a desired area size.
origins : NDArray
The circle origins.
radii : NDArray
The circle radii.
subset_geometries : dict[Tuple[bool], shapely.geometry.polygon.Polygon]
The dictionary mapping each subset to its shapely geometry.
subset_artists : dict[tuple[bool], plt.Polygon]
The matplotlib Polygon patches representing each subset.
subset_label_artists : dict[tuple[bool], plt.Text]
The matplotlib text objects used to label each subset.
set_label_artists : list[plt.Text]
The matplotlib text objects used to label each set.
ax : plt.Axes
The matplotlib axis instance.
"""
def __init__(
self,
subset_sizes : Mapping[Tuple[bool], Union[int, float]],
subset_labels : Optional[Mapping[Tuple[bool], str]] = None,
subset_label_formatter : Callable[[Tuple[bool], Union[int, float]], str] = lambda subset, size : str(size),
set_labels : Optional[list[str]] = None,
set_colors : Optional[list[ColorType]] = None,
ax : Optional[plt.Axes] = None,
) -> None:
if subset_labels is None:
subset_labels = self._get_subset_labels(
subset_sizes, subset_label_formatter)
# Specify area of subset patches independently of actual subset size.
self.subset_areas = self._get_subset_areas(list(subset_sizes.keys()))
super().__init__(
self.subset_areas,
subset_labels = subset_labels,
set_labels = set_labels,
set_colors = set_colors,
cost_function_objective = "simple",
verbose = False,
ax = ax,
)
def _get_subset_areas(self, subsets : list[Tuple[bool]]) -> dict[Tuple[bool], float]:
"""Creates a dictionary mapping subsets to area sizes. The
values are independent of subset size."""
subset_size = dict()
for subset_id in subsets:
# # Option 1: all subsets are equal size
# subset_size[subset_id] = 1
# Option 2: intersections half in size with each superset
subset_size[subset_id] = 1 / 2**(np.sum(subset_id) - 1)
return subset_size