-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathassign-colors
More file actions
executable file
·96 lines (88 loc) · 4.17 KB
/
assign-colors
File metadata and controls
executable file
·96 lines (88 loc) · 4.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#!/usr/bin/env python3
"""
Generate colors.tsv for augur export based on ordering, color schemes, and
traits that exists in the metadata.
"""
import argparse
import pandas as pd
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description="Assign colors based on defined ordering of traits.",
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument('--ordering', type=str, required=True,
help="""Input TSV file defining the color ordering where the first
column is the field and the second column is the trait in that field.
Blank lines are ignored. Lines starting with '#' will be ignored as comments.""")
parser.add_argument('--color-schemes', type=str, required=True,
help="Input color schemes where each line is a different color scheme separated by tabs.")
parser.add_argument('--metadata', type=str,
help="""If provided, restrict colors to only those traits found in
metadata. If the metadata includes a 'focal' column that only contains
boolean values, then restrict colors to traits for rows where 'focal'
is set to True.""")
parser.add_argument('--output', type=str, required=True,
help="Output colors TSV file to be passed to augur export.")
args = parser.parse_args()
assignment = {}
with open(args.ordering) as f:
for line in f.readlines():
array = line.strip().split("\t")
# Ignore empty lines or commented lines
if not array or not array[0] or array[0].startswith('#'):
continue
# Throw a warning if encountering a line not matching the expected number of columns, ignore line
elif len(array)!=2:
print(f"WARNING: Could not decode color ordering line: {line}")
continue
# Otherwise, process color ordering where we expect 2 columns: name, traits
else:
name = array[0]
trait = array[1]
if name not in assignment:
assignment[name] = [trait]
else:
assignment[name].append(trait)
# if metadata supplied, go through and
# 1. remove assignments that don't exist in metadata
# 2. remove assignments that have 'focal' set to 'False' in metadata
if args.metadata:
metadata = pd.read_csv(args.metadata, delimiter='\t')
for name, trait in assignment.items():
if name in metadata:
if 'focal' in metadata and metadata['focal'].dtype == 'bool':
focal_list = metadata.loc[metadata['focal'], name].unique()
subset_focal = [x for x in assignment[name] if x in focal_list]
assignment[name] = subset_focal
else: # no 'focal' present
subset_present = [x for x in assignment[name] if x in metadata[name].unique()]
assignment[name] = subset_present
schemes = {}
counter = 0
with open(args.color_schemes) as f:
for line in f.readlines():
counter += 1
array = line.lstrip().rstrip().split("\t")
schemes[counter] = array
with open(args.output, 'w') as f:
for trait_name, trait_array in assignment.items():
if len(trait_array)==0:
print(f"No traits found for {trait_name}")
continue
if len(schemes)<len(trait_array):
print(f"WARNING: insufficient colours available for trait {trait_name} - reusing colours!")
remain = len(trait_array)
color_array = []
while(remain>0):
if (remain>len(schemes)):
color_array = [*color_array, *schemes[len(schemes)]]
remain -= len(schemes)
else:
color_array = [*color_array, *schemes[remain]]
remain = 0
else:
color_array = schemes[len(trait_array)]
zipped = list(zip(trait_array, color_array))
for trait_value, color in zipped:
f.write(trait_name + "\t" + trait_value + "\t" + color + "\n")
f.write("\n")