-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfishtree_script.py
More file actions
69 lines (43 loc) · 1.93 KB
/
fishtree_script.py
File metadata and controls
69 lines (43 loc) · 1.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import requests
import json
import dendropy
from opentree import OT
fi = open("../FishTree/alfaro_fishset/189taxa_main.csv").readlines()
lin = fi[1:] #skip the header
lii = [line.split(",") for line in lin] #split on commas
ott_ids = [int(item[2]) for item in lii] #grabs all opentree ids
# maps ott ids (formatted as node ids) back to names
translation_dict = {"ott"+item[2]:item[1] for item in lii}
treefile = "alfaro_fishset.tre"
#Get the synthetic tree from OpenTree
output = OT.synth_induced_tree(ott_ids=list(ott_ids), label_format='name')
output.tree.write(path = treefile, schema = "newick")
output.tree.print_plot(width=100)
## show broken taxa
print("broken taxa:")
print(output.response_dict['broken'])
## Get citations
cites = OT.get_citations(output.response_dict['supporting_studies'])
cite_fi = open("topology_citations.txt","w")
cite_fi.write(cites)
cite_fi.close()
## Get Dated synth tree
url = 'https://dates.opentreeoflife.org/v4/dates/dated_tree'
## Requires node ids - which for ott ids are just the id + 'ott''
payload = { "node_ids" : list(translation_dict.keys())}
resp = requests.post(url=url, data=json.dumps(payload))
assert(resp.status_code==200)
resp_dict = resp.json()
## Somehwat annoyingly, the repsonse always has a 'list' of trees, even where there is only one tree
dated_tree = dendropy.Tree.get(string = resp_dict['dated_trees_newick_list'][0], schema="newick")
#The dated tree labels are all as ottids - which are convenient for data analysis but annoying for interpretability
## This uses an API call to translate them back to
for taxon in dated_tree.taxon_namespace:
ottid = taxon.label
taxon.label = translation_dict[ottid] + "_" + ottid
dated_tree.write(path="labelled_dated_fish.tre", schema="newick")
## Pull the citations for the dates
date_cites = OT.get_citations(resp_dict['date_sources'])
date_cite_fi = open("date_citations.txt","w")
date_cite_fi.write(date_cites)
date_cite_fi.close()