-
Notifications
You must be signed in to change notification settings - Fork 0
/
mole_tree.py
150 lines (136 loc) · 6.27 KB
/
mole_tree.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# sort all tuples (moles) in Ms considering values of MM(e)
def sort_tuple(Ms, MM):
Ms_sorted = []
for mole in Ms:
mole_dict = {} # subdictionary of MM that contains only values of MM for elements in mole tuple
mole = sorted(mole, reverse=True) # it grants ordering if MM values are equal: simple numeric order
for item in mole:
mole_dict[item] = MM[item]
# sort mole_dict by value
mole_dict = dict(sorted(mole_dict.items(), key=lambda item: item[1], reverse=True))
Ms_sorted.append(tuple(mole_dict.keys()))
return Ms_sorted
class ScoreList:
def __init__(self,MM_value,IL_value,link):
self.MM = MM_value
self.IL = IL_value
self.node_link = link # replace linking all nodes in tree
class MoleTree:
def __init__(self, level, Ms, label, mole_num: int, father):
self.level = level # level of the node in tree
self.Ms = Ms # list of visible moles for the node
self.children = [] # (list of nodes)
self.father = father
self.label = label
self.mole_num = mole_num
#self.node_link = node_link # replaced by a list of nodes in score table
def print_tree(self):
print(" " * self.level, self.label,":",self.mole_num)
for child in self.children:
child.print_tree()
# build the subtree from the node (recursive)
def build_tree(self):
for mole in self.Ms:
# add children to the node based on level element
if self.level != len(mole): # exit recursion: we reached a leaf
new_label = mole[self.level]
if new_label in [node.label for node in self.children]: # child node already exists
for child in self.children:
if new_label == child.label:
child.mole_num += 1
else: # add new child node
# create Ms for the child
new_Ms = []
for mtuple in self.Ms:
if mtuple[self.level] == new_label:
new_Ms.append(mtuple)
# create child node
new_child = MoleTree(self.level+1, new_Ms, new_label, 1, self)
self.children.append(new_child)
# recursion by levels
for child in self.children:
child.build_tree() # recursion
def get_ancestors(self):
ancestors = []
node = self
while node.father.label != "null":
ancestors.append(node.father)
node = node.father
return ancestors
def build_link(self,label, ret:list):
for child in self.children:
if child.label == label:
ret.append(child)
else:
child.build_link(label,ret)
def build_score_table(self,MM:dict,IL:dict):
score_table = {}
labels = MM.keys()
for label in labels:
l = []
self.build_link(label,l)
score_table[label] = ScoreList(MM[label],IL[label],l)
return score_table
# remove the subtree having 'self' as root, the root and his link in score table
def remove_subtree(self, root, score_table):
if self.label != root:
score_table[self.label].node_link.remove(self) # remove from score table links
for child in self.children:
child.remove_subtree(root,score_table) #recursively remove subtree
self.children.clear() # remove all children from self
if self.label == root:
# we don't need to update the children of root, because at this point they don't exists anymore
for ancestor in self.get_ancestors(): # update ancestors of root
ancestor.mole_num -= self.mole_num
score_table[ancestor.label].MM -= self.mole_num
self.father.children.remove(self) # remove root
score_table[self.label].MM -= self.mole_num
del self
def suppress_moles(self,MM,IL,method): # method param change how we select the element to remove
score_table = self.build_score_table(MM,IL)
# TODO: da stamapre con logging
print("tree:")
self.print_tree()
print("\nscore table:")
print("item MM IL node_links")
for label, score in score_table.items():
print(label, " ", score.MM, " ", score.IL, " ", [i.label for i in score.node_link])
supp_item = set()
keys = list(score_table)
while keys: # while score table is not empty
score = []
if method == "mmil":
for _,e in score_table.items():
score.append(e.MM/e.IL) # compute all MM/IL for all label
elif method == "mm":
for _,e in score_table.items():
score.append(e.MM) # compute all MM for all label
elif method == "1il":
for _,e in score_table.items():
score.append(1/e.IL) # compute all 1/IL for all label
else:
raise ValueError('Suppressing method not recognised')
e = keys[score.index(max(score))]
print("\nto remove (max MM/IL): ", e, "\n")
#TODO: da stamapre con logging
supp_item.add(e) # select the label with the max value of MM/IL
for link in score_table[e].node_link:
link.remove_subtree(link.label,score_table)
score_table[e].node_link.clear()
#remove e from score table
_ = score_table.pop(e)
keys.remove(e)
for k in list(score_table): # check if we have MM == 0 in score table
if score_table[k].MM == 0:
for link in score_table[k].node_link: # remove node with MM == 0 in tree
link.remove_subtree(link.label,score_table)
_ = score_table.pop(k)
keys.remove(k)
#TODO: da stamapre con logging
print("-------------------")
print("tree:")
self.print_tree()
print("score table:")
for label,score in score_table.items():
print(label, " ", score.MM, " ", score.IL, " ", [i.label for i in score.node_link])
return supp_item