This repository has been archived by the owner on Dec 22, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
/
lineage_Taxo.m
67 lines (56 loc) · 1.96 KB
/
lineage_Taxo.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
%% lineage_Taxo
% gets lineage of a species in Taxonomicon
%%
function [lineage rank] = lineage_Taxo(my_pet)
% created 2018/01/30 by Bas Kooijman
%% Syntax
% [lineage rank] = <../lineage_Taxo.m *lineage_Taxo*>(my_pet)
%% Description
% Gets lineage of species from the Taxonomicon
%
% Input:
%
% * my_pet: character string with name of an entry
%
% Output:
%
% * lineage: (n,1) cell array with lineage
% * rank: (n,1) cell array with ranks
%% Remarks
% <lineage.html *lineage*> gives a similar result for AmP entries, and <lineage_CoL.html *lineage_CoL*> for the Catalog of Life
%% Example of use
% [lin rank] = lineage_Taxo('Daphnia_magna')
id_Taxo = get_id_Taxo(my_pet);
if isempty(id_Taxo)
lineage = []; rank = [];
return
end
url = urlread(['http://taxonomicon.taxonomy.nl/TaxonTree.aspx?id=', id_Taxo]);
if ~isempty(strfind(url, 'This unexpected error'))
lineage = []; rank = [];
fprintf('Warning from lineage_Taxo: website Taxonomicon is presently not working properly\n')
return
end
% remove all stuff around classification
url(1:strfind(url, ' ') - 1) = [];
url(strfind(url, '<br /></p></div>'):end) = [];
ind = strfind(url, ' '); % find lines for all ranks
n = length(ind); lineage = cell(n,1); rank = cell(n,1);
for i = 1:n-1 % scan lineage
res_i = url(ind(i):ind(i+1)); % substring for 1 taxon
rank_i = res_i((1 + strfind(res_i,'>')):(strfind(res_i,'<b>') - 1));
if strcmp(rank_i, '"') || isempty(rank_i)
rank_i = []; % replace empty char string by empty (there is a difference when converting to cells))
else
rank_i = strrep(rank_i, ' ', ''); % remove trailing spaces
end
rank(i) = {rank_i}; % convert char str to cell
lineage(i) = {res_i((3 + strfind(res_i,'<i>')):(strfind(res_i,'</i>') - 1))};
end
lineage(end) = {my_pet}; rank(end) = {'Species'};
[j i] = ismember('Animalia', lineage);
if ~j
fprintf(['Warning from lineage_Taxo: ', my_pet, ' is not classified as belonging to Animalia\n'])
else
rank(1:i-1) = []; lineage(1:i-1) = [];
end