% This work is licensed under a % Creative Commons Attribution-ShareAlike 4.0 International License. % This code should be attributed to Dr David Werner, Newcastle University. % It cannot be guaranteed that the code is error free. If you identify an % issue, please contact: david.werner@ncl.ac.uk. Last reviewed: 11/03/2020 % Define the total number of reads per sample for the rarefication. This % will typically be a number smaller than the minimum total number of % classified reads for sample with the lowest number of reads amongst % the samples you want to compare. For samples with less than the specified % number or reads, an all zeros column will be returned Tot_reads_rarified = 90000; % Define the name of the excel workbook for the output. The name needs to % include the .xlsx extension. % Make sure the destination is not opened in another application and does % not yet already contain data Workbook_Name = 'OTUTableRoot_Akaki.xlsx'; % Reducing the data file to exclude unnecessary columns and rows if size(data,2) > 2 % Deleting reads below the quality threshold data(data(:,3)==-1,:)=[]; % Deleting the accuracy column data(:,3) = []; % Deleting rows with missing data data = rmmissing(data); end % Creating the list of barcodes and species Barcodes = unique(data(:,2)); Species = unique(data(:,1)); % Matching species ID with taxonomy load('NCBItxidlineage.mat'); SpeciesLibraryIndex = arrayfun(@(x)(find(NCBItxidlineage.txid==x)),Species',... 'UniformOutput',false); SpeciesNotInLibraryIndex = find(cellfun('isempty',SpeciesLibraryIndex)); SpeciesNotInLibraryTaxids = Species(SpeciesNotInLibraryIndex); SpeciesLibraryIndex(SpeciesNotInLibraryIndex) = {1}; SpeciesLibraryIndexFinal = cell2mat(SpeciesLibraryIndex'); SpeciesLineage = NCBItxidlineage(SpeciesLibraryIndexFinal',:); % Creating the complete OTU table and saving it as excel worksheet SpeciesOTUTable_Reads = zeros(size(Species,1),size(Barcodes,1)); SpeciesOTUTable_RelAb = zeros(size(Species,1),size(Barcodes,1)); SpeciesOTUTable_Reads_Rarefied = zeros(size(Species,1),size(Barcodes,1)); SpeciesOTUTable_RelAb_Rarefied = zeros(size(Species,1),size(Barcodes,1)); for i = 1:size(Barcodes,1) % Extracting the species data for the barcode BCSpecies = data(data(:,2)==Barcodes(i),1); % Total reads above the quality threshold BCReadsTot = size(BCSpecies,1); % Counting the reads per species for each barcode SpeciesOTUTable_Reads(:,i) = hist(BCSpecies,Species)'; % Calculating the relative abundance by dividing by the total reads SpeciesOTUTable_RelAb(:,i) = SpeciesOTUTable_Reads(:,i)./BCReadsTot; % Extract and count rarefied number of species for the barcode if size(BCSpecies,1) >= Tot_reads_rarified BCSpecies_Rarefied = datasample(BCSpecies,Tot_reads_rarified,... 'Replace',false); SpeciesOTUTable_Reads_Rarefied(:,i) = hist(BCSpecies_Rarefied,Species)'; SpeciesOTUTable_RelAb_Rarefied(:,i) = ... SpeciesOTUTable_Reads_Rarefied(:,i)./Tot_reads_rarified; end end % Creating the label for the column headings Labels = readtable('LabelBarcodes_AKAKI.xlsx','Sheet',1); Index_Labels = arrayfun(@(x)(find(Labels.barcode==x)),Barcodes'); Label_Final = cellstr(["NCBItxid";Labels.labels(Index_Labels)]); T_Reads = [SpeciesLineage(:,2:end) array2table([Species SpeciesOTUTable_Reads],... 'VariableNames',Label_Final)]; T_RelAb = [SpeciesLineage(:,2:end) array2table([Species SpeciesOTUTable_RelAb],... 'VariableNames',Label_Final)]; % Remove all zero rows (species not present in rarified OTU table) % from the rarified reads OTU table T_Reads_Rarefied = [SpeciesLineage(:,2:end) array2table([Species SpeciesOTUTable_Reads_Rarefied],... 'VariableNames',Label_Final)]; Index_allzerorows = all(SpeciesOTUTable_Reads_Rarefied==0,2); T_Reads_Rarefied=T_Reads_Rarefied(~Index_allzerorows,:); T_RelAb_Rarefied = [SpeciesLineage(:,2:end) array2table([Species SpeciesOTUTable_RelAb_Rarefied],... 'VariableNames',Label_Final)]; Index_allzerorows = all(SpeciesOTUTable_RelAb_Rarefied==0,2); T_RelAb_Rarefied=T_RelAb_Rarefied(~Index_allzerorows,:); % Saving the data to excel, OTU reads table in the first, and rarified OTU % reads table in the second worksheet writetable(T_Reads,Workbook_Name,'WriteRowNames',true,'Sheet',1,'Range','A1'); writetable(T_RelAb,Workbook_Name,'WriteRowNames',true,'Sheet',2,'Range','A1'); writetable(T_Reads_Rarefied,Workbook_Name,'WriteRowNames',true,'Sheet',3,'Range','A1'); writetable(T_RelAb_Rarefied,Workbook_Name,'WriteRowNames',true,'Sheet',4,'Range','A1');