% This work is licensed under a % Creative Commons Attribution-ShareAlike 4.0 International License. % This code should be attributed to Dr David Werner, Newcastle University. % It cannot be guaranteed that the code is error free. If you identify an % issue, please contact: david.werner@ncl.ac.uk. Last reviewed: 12/04/2020 clear all; % Load your OTU table OTU_Table = readtable('Genus_OTUTableRoot_Akaki.xlsx','Sheet',4); % Load a table describing how you want to label your samples in your plots % and up to 2 factors for grouping your samples. DataAnalysisInput_Table = readtable('DataAnalysisInputs_NOV2019_Akaki.xlsx'); % Define the OTU_Table column which contains the labels for your variables % (or example genus or root) Variable_Column = 1; % Define the first OTU_Table column which contains data FirstData_Column = 2; % Define how many arrows/OTUs you want to include in the PCA plot NofArrows = 10; % Define which principal components you want to display in the PCA plots PCA_display = [1,2]; % Define the plot font size FontSz = 20; % Define the plot line width LineSz = 1.5; % Define the plot marker size MarkerSz = 160; % Define where to place the legend (northeast for top right corner, northwest, % for top left corner, southeast for bottom right corner or southwest for % bottomleft corner) LegendPosition = 'northeast'; % Define the data you want to cluster Data = OTU_Table(:,FirstData_Column:end); DataforAnalysisRaw=table2array(Data(:,... find(DataAnalysisInput_Table.Include))); % Find and remove rows with all zero values Index_allzerorows = all(DataforAnalysisRaw==0,2); DataforAnalysis = DataforAnalysisRaw(~Index_allzerorows,:)'; % Squareroot transform the data DataforAnalysisSqrtTransf = DataforAnalysis.^0.5; % Retain only the information for the samples included in the LabelsAndFactors_Table = DataAnalysisInput_Table(DataAnalysisInput_Table.Include == 1,:); % Define the labels for your observations ObservationLabel = LabelsAndFactors_Table.samplelabel; % Define the labels for your variables VariableLabel = table2array(OTU_Table(~Index_allzerorows,Variable_Column))'; % Define the symbols for factor 1, and colors for factor 2. There can be % at most 15 classifications. Factor 3 will be represented by filled and empty % markers. If there are 3 factors, factor 1 can have up to 15 classification, % factor 2 can have up to 4 classifications and factor 3 can have 2 % classifications (filled vs. empty). Factor_Colors = ... [1 0 0;0 0 1;0 0 0;0 1 0;0 1 1;1 0 1;1 1 0;0.3412 0.5882 0.0196;... 0.6510 0.5725 0.3412;0.8314 0.7020 0.7843;0.9100 0.4100 0.1700;... 0.4667 0.6745 0.1882;0.635 0.0784 0.1843;0.1529 0.1373 0.6000;... 0.4000 0.2196 0.2196]; Factor_Markers = {'o','^','s','d','x','*','+'}; % Define basic figure properties close all; set(0,'DefaultFigureWindowStyle','docked'); set(groot, ... 'DefaultFigureColor', 'w', ... 'DefaultAxesLineWidth', LineSz, ... 'DefaultAxesFontSize', FontSz, ... 'DefaultLineLineWidth', LineSz, ... 'DefaultTextFontSize', FontSz); f1 = figure; f2 = figure; f3 = figure; f4 = figure; % Principal component analysis % Generate the principal component coefficients (loadings), scores and % variances, Hotelling's T-squared statistic, the percentage of the total % variance explained by each principal component and mu, the estimated mean % of each variable in X. [PCA_Coefficients,PCA_score,PCA_variances,PCA_tsquared,PCA_explainedvariance,... PCA_variablemean] = pca(DataforAnalysisSqrtTransf); % Create a PCA plot % Plot data for each factor combination with distinct symbology figure(f2); Factor1 = unique(LabelsAndFactors_Table.factor1,'stable'); Factor2 = unique(LabelsAndFactors_Table.factor2,'stable'); Factor3 = unique(LabelsAndFactors_Table.factor3,'stable'); Factor1label = unique(LabelsAndFactors_Table.factor1label,'stable'); Factor2label = unique(LabelsAndFactors_Table.factor2label,'stable'); Factor3label = unique(LabelsAndFactors_Table.factor3label,'stable'); % Rearrange so that factors will be plotted in ascending order (this % enables user to define the order of samples in the legend of the PCA plot) [~,Factor1sort]=sort(Factor1); [~,Factor2sort]=sort(Factor2); [~,Factor3sort]=sort(Factor3); Factor1 = Factor1(Factor1sort); Factor1label = Factor1label(Factor1sort); Factor2 = Factor2(Factor2sort); Factor2label = Factor2label(Factor2sort); Factor3 = Factor3(Factor3sort); Factor3label = Factor3label(Factor3sort); NumberofFactors = nnz([size(unique(DataAnalysisInput_Table.factor1),1) > 1,... size(unique(DataAnalysisInput_Table.factor2),1) > 1,... size(unique(DataAnalysisInput_Table.factor3),1) > 1]); if NumberofFactors == 1 k=1; for i = 1:size(Factor1) condition = LabelsAndFactors_Table.factor1 == Factor1(i); scatter(PCA_score(condition,PCA_display(1,1)),PCA_score(condition,... PCA_display(1,2)),MarkerSz,'MarkerEdgeColor',Factor_Colors(i,:),... 'LineWidth',LineSz); Legend_Labels(k) = Factor1label(i); hold on; k=k+1; end end if NumberofFactors == 2 k=1; for i = 1:size(Factor1) for j = 1:size(Factor2) condition = LabelsAndFactors_Table.factor1 == Factor1(i) & ... LabelsAndFactors_Table.factor2 == Factor2(j); if any(condition) scatter(PCA_score(condition,PCA_display(1,1)),PCA_score(condition,... PCA_display(1,2)),MarkerSz,char(Factor_Markers(j)),'MarkerEdgeColor',... Factor_Colors(i,:),'LineWidth',LineSz); Legend_Labels(k) = strcat(Factor1label(i),Factor2label(j)); hold on; k=k+1; end end end end if NumberofFactors == 3 k=1; for i = 1:size(Factor1) for j = 1:size(Factor2) for l = 1:size(Factor3) condition = LabelsAndFactors_Table.factor1 == Factor1(i) & ... LabelsAndFactors_Table.factor2 == Factor2(j) & ... LabelsAndFactors_Table.factor3 == Factor3(l); if Factor3(l) == 1 if any(condition) scatter(PCA_score(condition,PCA_display(1,1)),PCA_score(condition,... PCA_display(1,2)),MarkerSz,char(Factor_Markers(j)),'MarkerEdgeColor',... Factor_Colors(i,:),'LineWidth',LineSz); else scatter(PCA_score(condition,PCA_display(1,1)),PCA_score(condition,... PCA_display(1,2)),MarkerSz,char(Factor_Markers(j)),'MarkerFaceColor',... Factor_Colors(i,:),'MarkerEdgeColor',... Factor_Colors(i,:),'LineWidth',LineSz); end Legend_Labels(k) = strcat(Factor1label(i),Factor2label(j),Factor3label(l)); hold on; k=k+1; end end end end end % Plot top 10 species in the 2-dimensional PCA space PCA_VectorLength = (PCA_Coefficients(:,PCA_display(1,1)).^2+... PCA_Coefficients(:,PCA_display(1,2)).^2).^0.5; Index_PC1and2_V = find(ismember(PCA_VectorLength,maxk(PCA_VectorLength,NofArrows))); quiver(zeros(NofArrows,1),zeros(NofArrows,1),... PCA_Coefficients(Index_PC1and2_V,PCA_display(1,1)),... PCA_Coefficients(Index_PC1and2_V,PCA_display(1,2)),0,'MaxHeadSize',0.1,... 'LineWidth',LineSz); text(PCA_Coefficients(Index_PC1and2_V,PCA_display(1,1)),... PCA_Coefficients(Index_PC1and2_V,PCA_display(1,2)),... VariableLabel(1,Index_PC1and2_V)','FontWeight','bold'); title('PCA plot','FontWeight','bold'); [~, objh]=legend(Legend_Labels,'Fontsize',FontSz,'FontWeight','bold',... 'Location',LegendPosition); objhl = findobj(objh, 'type', 'patch'); set(objhl, 'Markersize', MarkerSz/15,'Linewidth',LineSz); legend boxoff; xlabel(['Component ',num2str(PCA_display(1,1)),': ',... num2str(round(PCA_explainedvariance(PCA_display(1,1)),2)),'%'],... 'FontWeight','bold'); ylabel(['Component ',num2str(PCA_display(1,2)),': ',... num2str(round(PCA_explainedvariance(PCA_display(1,2)),2)),'%'],... 'FontWeight','bold'); set(gca,'FontWeight','bold'); hold off; % Plot the top 10 variables contributing to the principal components 1 & 2 Index_PC1_V = find(ismember(abs(PCA_Coefficients(:,PCA_display(1,1))),... maxk(abs(PCA_Coefficients(:,PCA_display(1,1))),10))); Index_PC2_V = find(ismember(abs(PCA_Coefficients(:,PCA_display(1,2))),... maxk(abs(PCA_Coefficients(:,PCA_display(1,2))),10))); T_Barchart_PC1_V = [VariableLabel(Index_PC1_V)' ... array2table(PCA_Coefficients(Index_PC1_V,PCA_display(1,1)))]; T_Barchart_PC1_V = sortrows(T_Barchart_PC1_V,2); T_Barchart_PC1_V.Properties.VariableNames = {'Label' 'Values'}; T_Barchart_PC2_V = [VariableLabel(Index_PC2_V)' ... array2table(PCA_Coefficients(Index_PC2_V,PCA_display(1,2)))]; T_Barchart_PC2_V = sortrows(T_Barchart_PC2_V,2); T_Barchart_PC2_V.Properties.VariableNames = {'Label' 'Values'}; figure(f3); barh(T_Barchart_PC1_V.Values); yticklabels(T_Barchart_PC1_V.Label); title(['PC',num2str(PCA_display(1,1))],'FontWeight','bold'); xlabel('Top10 loadings','FontWeight','bold'); set(gca,'FontWeight','bold'); figure(f4); barh(T_Barchart_PC2_V.Values); yticklabels(T_Barchart_PC2_V.Label); title(['PC',num2str(PCA_display(1,2))],'FontWeight','bold'); xlabel('Top10 loadings','FontWeight','bold'); set(gca,'FontWeight','bold'); % Cluster analysis % Generate the linkage tree with the specific distance metric tree = linkage(DataforAnalysisSqrtTransf,'average','euclidean'); % Plot the dendrogram figure(f1); Clust = dendrogram(tree,0,'label',ObservationLabel); xtickangle(90); set(gca,'FontWeight','bold'); title('Cluster analysis','Fontsize',FontSz,'FontWeight','bold'); ylabel('Euclidean distance','FontWeight','bold'); % Conduct ANOSIM to evaluate significance of factors dis = f_dis(DataforAnalysisSqrtTransf,'euc'); if size(Factor1,1) > 1 ANOSIMresult_Factor1 = f_anosim(dis,LabelsAndFactors_Table.factor1,1,1000,1,0,1); end if size(Factor2,1) > 1 ANOSIMresult_Factor2 = f_anosim(dis,LabelsAndFactors_Table.factor2,1,1000,1,0,1); end if size(Factor3,1) > 1 ANOSIMresult_Factor3 = f_anosim(dis,LabelsAndFactors_Table.factor3,1,1000,1,0,1); end