Source code for teemi.learn.plotting

#!/usr/bin/env python
# MIT License
# Copyright (c) 2024, Technical University of Denmark (DTU)
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.

""" Module used for producing/reproducing plots"""

import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
from scipy import stats
import pandas as pd
import matplotlib.patches as mpatches


# for phylo
from Bio.Phylo.TreeConstruction import DistanceCalculator
from Bio import Phylo
import pylab
from Bio.Phylo.TreeConstruction import DistanceTreeConstructor


[docs]def carpet_barplot( pd_dataframe_cross_tab_prop, colorDict: dict, save_pdf=True, path="", xlabel="", ylabel="", size_height: int = 10, size_length: int = 20, bar_width=1.0, ylim=[0, 25], ) -> None: """Plotting stacked barplots from a pandas dataframe cross tab df Parameters ---------- pd_dataframe_cross_tab_prop : pd.DataFrames pd.crosstab colorDict : Dict key as coloumn name, value hex color code save_pdf : bool path : string Returns ------- stacked barplot """ #### How can I export a matplotlib figure as a vector graphic with editable text fields? mpl.rcParams["pdf.fonttype"] = 42 mpl.rcParams["ps.fonttype"] = 42 # Plot ax = pd_dataframe_cross_tab_prop.plot( kind="bar", stacked=True, color=colorDict, width=bar_width ) plt.legend(loc="upper right", ncol=2) plt.xlabel(xlabel, size=25, fontname="Helvetica", fontweight="bold") plt.ylabel(ylabel, size=25, fontname="Helvetica", fontweight="bold") plt.legend(bbox_to_anchor=(1.02, 1), loc="upper left", borderaxespad=0) plt.xscale("linear") # make axis integers plt.locator_params(axis="both", integer=True, tight=True) # remove y axis labes plt.yticks([]) plt.xticks(rotation=0, fontweight="bold", size=20) # changing x scale by own ## size matters fig = mpl.pyplot.gcf() fig.set_size_inches(size_length, size_height) # tight layout plt.tight_layout() # remove spines ax.spines["right"].set_visible(False) ax.spines["top"].set_visible(False) ax.spines["left"].set_visible(False) ax.spines["bottom"].set_visible(False) if save_pdf and path != "": ## save pdf plt.savefig(path + ".pdf", format="pdf", dpi=300, bbox_inches="tight") plt.show()
[docs]def plot_ml_learning_curve( x_partitioned_data: list, y_training: list, y_cv: list, training_sd, cv_sd: list, save_pdf=True, path="", size_height: int = 10, size_length: int = 10, title="", linewidth: int = 1.5, y_axis_range: list = [0, 25], ) -> None: """Plotting a learning curve from partitioned dataframes. Parameters ---------- x_partitioned_data : list x coordinates y_training : list y coordinates for trained models y_cv : list y coordinates for cross-validated models training_sd : list standard-deviation for the models cv_sd : list standard-deviation for cross-validated models save_pdf : bool path : str Returns ------- Learning curve """ # making sd into numpy array training_sd = np.array(training_sd) cv_sd = np.array(cv_sd) # Create Figure and Axes instances fig, ax = plt.subplots(1) # CV_mae plt.plot(x_partitioned_data, y_cv, color="#986e42", linewidth=linewidth) plt.fill_between(x_partitioned_data, y_cv - cv_sd, y_cv + cv_sd, color="#ffe7b5") # Model plotted plt.plot(x_partitioned_data, y_training, color="Blue", linewidth=linewidth) plt.fill_between( x_partitioned_data, y_training - training_sd, y_training + training_sd, color="#ADD8E6", ) # Remove spines ax.spines["right"].set_visible(False) ax.spines["top"].set_visible(False) ax.spines["left"].set_visible(False) ax.spines["bottom"].set_visible(False) # Add labels and titel ax.set_xlabel( "Length of the partitioned data", size=20, fontname="Helvetica", fontweight="bold", ) ax.set_ylabel("MAE", size=20, fontname="Helvetica") ax.set_title(title, size=30, fontname="Helvetica") ax.legend( [ "Cross-validation mean MAE", "Cross-validation standard-deviation", "Model performance MAE", "Model standard-deviation", ], loc="upper right", shadow=True, fontsize="small", ) # Set color ax.set_facecolor("white") # SIze matters fig = mpl.pyplot.gcf() fig.set_size_inches(size_length, size_height) # change y-axis range plt.ylim(y_axis_range) if save_pdf and path != "": ## save pdf plt.savefig(path + ".pdf", format="pdf", dpi=300, bbox_inches="tight") # show plt.show()
[docs]def bar_plot( x: list, y: list, error_bar: list = None, horisontal_line=True, save_pdf=True, color="white", path="", title=None, x_label=None, y_label=None, size_height: int = 25, size_length: int = 15, ) -> None: """Plotting a bar_plot . Parameters ---------- x : list x coordinates y : list y coordinates error_bar : list (Optional) lits of errorbars matching the x coordinates horisontal_line : bool save_pdf : bool path : str color : str can be matplotlib color names or hex color codes title : str x_label : str y_label : str Returns ------- bar_plot""" #### How can I export a matplotlib figure as a vector graphic with editable text fields? mpl.rcParams["pdf.fonttype"] = 42 mpl.rcParams["ps.fonttype"] = 42 # Create Figure and Axes instances fig, ax = plt.subplots(1) # Plot plt.bar(x, y, edgecolor="black", color=color) # white # Errorbar if error_bar != None: plt.errorbar( x, y, yerr=error_bar, fmt="o", color="black", ) # ms = 2) # add horisontal line if horisontal_line: plt.axhline(y=100, color="black", linestyle="dashed") # Title and labels if title is not None: ax.set_title(title, size=30, fontname="Helvetica") if x_label is not None: ax.set_xlabel(x_label, size=20, fontname="Helvetica") if y_label is not None: ax.set_ylabel(y_label, size=20, fontname="Helvetica") # Set color ax.set_facecolor("white") # remove spines ax.spines["right"].set_visible(False) ax.spines["top"].set_visible(False) ax.spines["left"].set_visible(False) ax.spines["bottom"].set_visible(False) # SIze matters fig = mpl.pyplot.gcf() fig.set_size_inches(size_length, size_height) if save_pdf and path != "": plt.savefig(path + ".pdf", format="pdf", dpi=300, bbox_inches="tight") plt.show()
[docs]def horisontal_bar_plot( x: list, y: list, vertical_line: bool = True, save_pdf: bool = True, path: str = "", color="white", title=None, x_label=None, y_label=None, size_height: int = 10, size_length: int = 8, legend=False, ) -> None: """Plotting a horisontal_bar_plot . Parameters ---------- x : list x coordinates y : list y coordinates vertical_line : bool save_pdf : bool path : str color : str can be matplotlib color names or hex color codes title : str x_label : str y_label : str Returns ------- horisontal_bar_plot""" #### How can I export a matplotlib figure as a vector graphic with editable text fields? mpl.rcParams["pdf.fonttype"] = 42 mpl.rcParams["ps.fonttype"] = 42 # Create Figure and Axes instances fig, ax = plt.subplots(1) # Plot plt.barh(x, y, edgecolor="black", color=color) # white # '#deebf7' # Change x labels rotation ax.tick_params(rotation=90) # LEGEND if legend: (lineOne,) = ax.plot([], label="dbtl_2") (lineTwo,) = ax.plot([], label="dbtl_1") ax.legend(handles=[lineOne, lineTwo], loc="best") # remove gridlines ax.grid(False) # Title and labels if title is not None: ax.set_title(title, size=30, fontname="Helvetica") if x_label is not None: ax.set_xlabel(x_label, size=20, fontname="Helvetica") if y_label is not None: ax.set_ylabel(y_label, size=20, fontname="Helvetica") # Background ax.set_facecolor("white") # add horisontal line if vertical_line: plt.axvline(x=100, color="black", linestyle="dashed") # rotate sticks ax.tick_params(rotation=0) # ## adding the labels on the bar for c in ax.containers: ax.bar_label(c, padding=10) # remove spines ax.spines["right"].set_visible(False) ax.spines["top"].set_visible(False) ax.spines["left"].set_visible(False) ax.spines["bottom"].set_visible(False) # SIze matters fig = mpl.pyplot.gcf() fig.set_size_inches(size_height, size_length) if save_pdf and path != "": plt.savefig(path + ".pdf", format="pdf", dpi=300, bbox_inches="tight") plt.show()
[docs]def correlation_plot( dataframe, x: str, y: str, save_pdf: bool = True, path: str = "", title: str = "", size_height: int = 10, size_length: int = 10, y_axis_range: list = [0, 1], x_axis_range: list = [0, 1], ) -> None: """Plotting a correlation_plot. Parameters ---------- dataframe : pd.DataFrame x : str x coordinates y : str y coordinates save_pdf : bool path : str size_height : int size_length : int x_axis_range : list y_axis_range : list Returns ------- correlation_plot""" # set seaborn plotting aesthetics as default sns.set_context("paper", font_scale=2.0, rc={"lines.linewidth": 1.5}) dataframe["color"] = "black" g = sns.lmplot( data=dataframe, x=x, y=y, hue="color", palette=["#000000"], fit_reg=True, height=10, line_kws={"color": "black"}, ci=False, legend=False, ) r, p = stats.pearsonr(dataframe[x], dataframe[y]) # add white background ax = plt.gca() ax.set_facecolor("white") # add R and P values plt.suptitle( f"R-squared = {r:.3f} \n P-value = {p:.3E}", y=0.8, x=0.3, size=15, fontname="Helvetica", fontweight="bold", ) # add title ax.set_title(title) # SIze matters fig = mpl.pyplot.gcf() fig.set_size_inches(size_height, size_length) # change y-axis range plt.xlim(x_axis_range) plt.ylim(y_axis_range) if save_pdf and path != "": plt.savefig(path + ".pdf", format="pdf", dpi=300, bbox_inches="tight") plt.show()
[docs]def bar_plot_w_hue( dataframe, x: str, y: str, save_pdf=True, path="", hue: str = "category", palette="dark", title="", x_label="", y_label="", horisontal_line: bool = True, size_height: int = 10, size_length: int = 10, ) -> None: """Plotting a correlation_plot. Parameters ---------- dataframe : pd.DataFrame Dataframe with categories in seperate column from x and y x : str x coordinates y : str y coordinates save_pdf : bool path : str path to folder with name of the file title : str x_label : str y_label : str Returns ------- bar_plot_w_hue""" #### How can I export a matplotlib figure as a vector graphic with editable text fields? mpl.rcParams["pdf.fonttype"] = 42 mpl.rcParams["ps.fonttype"] = 42 ax = sns.barplot(x=x, y=y, hue=hue, data=dataframe, palette=palette) # Remove spines sns.despine() # add labels ax = plt.gca() ax.set_xlabel(x_label, size=20, fontname="Helvetica", fontweight="bold") ax.set_ylabel(y_label, size=20, fontname="Helvetica", fontweight="bold") ax.set_title(title, size=30, fontname="Helvetica", fontweight="bold") # white background ax.set_facecolor("white") plt.xscale("linear") # SIze matters fig = mpl.pyplot.gcf() fig.set_size_inches(size_length, size_height) if horisontal_line: # normalized line ax.axhline(100, color="black", linestyle="dashed") if save_pdf == True and path != "": plt.savefig(path + ".pdf", format="pdf", dpi=300) plt.show()
[docs]def color_range_dict() -> dict: """Returns a dictionary of color ranges. Returns ------- dict A dictionary of color ranges containing keys 'yellow', 'orange', 'blue' and 'green' """ return { "yellow": [ "#a59a00", "#aa9e00", "#b0a300", "#b5a800", "#bbad00", "#c0b200", "#c5b700", "#cbbc00", "#d0c100", "#d6c600", "#dbcb03", "#e1d011", "#e6d51b", "#ecda24", "#f2df2b", "#f7e432", "#fde938", "#ffef40", "#fff647", ], "orange": [ "#cd6511", "#d26916", "#d76d1b", "#dc7120", "#e17624", "#e67a28", "#eb7e2d", "#f08231", "#f58635", "#fa8a39", "#fe8f3e", "#ff9544", "#ff9c4b", "#ffa351", "#ffaa58", "#ffb15e", "#ffb764", "#ffbd6a", "#ffc470", "#ffca76", "#ffd07c", "#ffd682", ], "blue": [ "#2d89bc", "#348dc0", "#3b92c5", "#4197ca", "#479bcf", "#4da0d4", "#52a5d9", "#58aade", "#5daee3", "#63b3e8", "#68b8ee", "#6ebdf3", "#73c2f8", "#78c7fd", "#7eccff", "#84d1ff", "#8ad7ff", "#8fdcff", "#95e2ff", "#9be7ff", "#a1edff", "#a6f3ff", "#acf8ff", "#b2feff", ], "green": [ "#24b161", "#2bb565", "#32ba69", "#38bf6d", "#3ec371", "#44c876", "#4acd7a", "#4fd27e", "#54d683", "#5adb87", "#5fe08b", "#64e590", "#69ea94", "#6eee99", "#73f39d", "#78f8a2", "#7efda7", "#91ffb9", ], }
[docs]def plot_phylo_tree(alignment_file, save_pdf=True, path="", height=10, wideness=8): """Plotting a phylogenetic tree. Parameters ---------- alignment_file : Bio.Align.MultipleSeqAlignment A multiple sequence alignemt made into a Bio.Align.MultipleSeqAlignment obejct Dataframe with categories in seperate column from x and y save_pdf : bool path : str path to folder with name of the file Returns ------- plot_phylo_tree""" # calculate distances in protein identety: calculator = DistanceCalculator("identity") dm = calculator.get_distance(alignment_file) # Construct tree constructor = DistanceTreeConstructor() tree = constructor.upgma(dm) # Plot tree # set the size of the figure plt.rc("font", family="Helvetica") mpl.rcParams["pdf.fonttype"] = 42 mpl.rcParams["ps.fonttype"] = 42 fig = plt.figure(figsize=(height, wideness), dpi=300) plt.rcParams.update({"font.size": 10}) axes = fig.add_subplot(1, 1, 1) fig1 = plt.gcf() Phylo.draw(tree, axes=axes, do_show=False, branch_labels=None) pylab.axis("off") # homologs plt.rcParams.update({"font.size": 10}) # Saving the plot if save_pdf and path != "": ## save pdf plt.savefig(path + ".pdf", format="pdf", dpi=300, bbox_inches="tight") plt.show()
[docs]def plot_stacked_barplot_with_labels( df: pd.DataFrame, colors: list, title="", y_label="", x_label="", path="", size_length: int = 20, size_heigth: int = 10, ): """Plots a stacked barplot from a dataframe. Parameters ---------- df : pd.DataFrame The dataframe containing the data to be plotted. colors : list A list of colors for the different bars in the plot. title : str, optional The title of the plot. Default is an empty string. y_label : str, optional The label for the y-axis of the plot. Default is an empty string. x_label : str, optional The label for the x-axis of the plot. Default is an empty string. path : str, optional The path to the directory where the plot will be saved. Default is an empty string. """ # Initialize plt.rc("font", family="Helvetica") mpl.rcParams["pdf.fonttype"] = 42 mpl.rcParams["ps.fonttype"] = 42 ax = df.plot( kind="bar", stacked=True, figsize=(size_length, size_heigth), color=colors, edgecolor="Black", ) # , cmap="coolwarm" # Add Title and Labels plt.title(title, fontsize=40) plt.xlabel(y_label, fontsize=25, weight="bold") plt.ylabel(x_label, fontsize=25, weight="bold") ax.tick_params(axis="both", which="major", labelsize=25) # removes the borders around the plot sns.despine(bottom=True, left=True) ax.legend([], [], frameon=False) # around the legend # adding laves to each box for c in ax.containers: # this one writes label and percent labels_for_bars = [f"{c.get_label()} \n{round(v.get_height(),2)} %" for v in c] # remove the labels parameter if it's not needed for customized labels ax.bar_label( c, labels=labels_for_bars, label_type="center", fmt="str", size=20, weight="bold", ) if path != "": name = "Occurences of each part sampled" plt.savefig(path + name + ".pdf", format="pdf", dpi=300)
[docs]def grouped_bar_plot( x: list, y: list, colors: list, category_labels: list, title="", y_label="", x_label="", path="", axhline=True, size_height: int = 10, size_length: int = 10, ): """ Create a grouped bar plot from input data and save the plot in a pdf format Parameters ---------- x : list A list of x-coordinates of the bars in the plot. y : list A list of y-coordinates of the bars in the plot. colors : list A list of color codes for the bars in the plot. category_labels : list A list of labels for the categories represented by the bars in the plot. title : str, optional The title of the plot. Default is an empty string. y_label : str, optional The label for the y-axis of the plot. Default is an empty string. x_label : str, optional The label for the x-axis of the plot. Default is an empty string. path : str, optional The path to the directory where the plot will be saved. Default is an empty string. axhline : bool, optional The flag to show an horisontal line """ #### How can I export a matplotlib figure as a vector graphic with editable text fields? mpl.rcParams["pdf.fonttype"] = 42 mpl.rcParams["ps.fonttype"] = 42 # Create Figure and Axes instances fig, ax = plt.subplots(1) # Plot plt.bar(x, y, edgecolor="black", color=colors) # white - orange = #fee6ce # Add labels and titel ax.set_ylabel(y_label, size=20, fontname="Helvetica") ax.set_xlabel(x_label, size=30, fontname="Helvetica") ax.set_title(title, size=30, fontname="Helvetica") if axhline: # add horisontal line plt.axhline(y=100, color="black", linestyle="dashed") # Set color ax.set_facecolor("white") white_patch = mpatches.Patch(color="white", label=category_labels[0]) black_patch = mpatches.Patch(color="black", label=category_labels[1]) ax.legend(handles=[white_patch, black_patch], fontsize=20, frameon=False) # remove spines ax.spines["right"].set_visible(False) ax.spines["top"].set_visible(False) ax.spines["left"].set_visible(False) ax.spines["bottom"].set_visible(False) # SIze matters # SIze matters fig = mpl.pyplot.gcf() fig.set_size_inches(size_height, size_length) if path != "": name = "grouped_bar_plot" plt.savefig(path + name + ".pdf", format="pdf", dpi=300) plt.show()