Quest 9: Encoded in the Scales
- Keep top level comments as only solutions, if you want to say something other than a solution put it in a new post. (replies to comments can be whatever)
- You can send code in code blocks by using three backticks, the code, and then three backticks or use something such as https://topaz.github.io/paste/ if you prefer sending it through a URL
Link to participate: https://everybody.codes/


Python
# returns parents of child if found, else None def get_parents(dnas: list[list[str]], child: int): candidates = len(dnas) seq_len = len(dnas[0]) for p1 in range(candidates): if p1 == child: continue for p2 in range(p1 + 1, candidates): if p2 == child: continue for idx in range(seq_len): if dnas[child][idx] not in [dnas[p1][idx], dnas[p2][idx]]: # mismatch found break else: # no-break => all matched return p1, p2 return None # yields all children with their parents from a collection of dnas def yield_children(dnas: list[list[str]]): for child in range(len(dnas)): parents = get_parents(dnas, child) if parents is not None: yield child, parents def part1(data: str): # parse input data into list of DNA sequences dnas = [list(dna.split(":")[1]) for dna in data.splitlines()] seq_len = len(dnas[0]) child, parents = next(yield_children(dnas)) similarities = [0, 0] for idx in range(seq_len): for pi in range(2): if dnas[child][idx] == dnas[parents[pi]][idx]: similarities[pi] += 1 return similarities[0] * similarities[1] assert ( part1("""1:CAAGCGCTAAGTTCGCTGGATGTGTGCCCGCG 2:CTTGAATTGGGCCGTTTACCTGGTTTAACCAT 3:CTAGCGCTGAGCTGGCTGCCTGGTTGACCGCG""") == 414 ) def part2(data: str): # parse input data into list of DNA sequences dnas = [list(dna.split(":")[1]) for dna in data.splitlines()] seq_len = len(dnas[0]) children_gen = yield_children(dnas) all_similarity = 0 for child, parents in children_gen: similarities = [0, 0] for idx in range(seq_len): for pi in range(2): if dnas[child][idx] == dnas[parents[pi]][idx]: similarities[pi] += 1 all_similarity += similarities[0] * similarities[1] return all_similarity assert ( part2("""1:GCAGGCGAGTATGATACCCGGCTAGCCACCCC 2:TCTCGCGAGGATATTACTGGGCCAGACCCCCC 3:GGTGGAACATTCGAAAGTTGCATAGGGTGGTG 4:GCTCGCGAGTATATTACCGAACCAGCCCCTCA 5:GCAGCTTAGTATGACCGCCAAATCGCGACTCA 6:AGTGGAACCTTGGATAGTCTCATATAGCGGCA 7:GGCGTAATAATCGGATGCTGCAGAGGCTGCTG""") == 1245 ) # Disjoint Set Union (Union-Find) data structure # with path compression and union by rank class DSU: def __init__(self, size): self.parent = list(range(size)) self.rank = [1] * size def find(self, x): # path compression if self.parent[x] != x: self.parent[x] = self.find(self.parent[x]) return self.parent[x] def union(self, x, y): rootX = self.find(x) rootY = self.find(y) if rootX == rootY: return False # union by rank # attach smaller rank tree under root of higher rank tree if self.rank[rootX] < self.rank[rootY]: self.parent[rootX] = rootY elif self.rank[rootX] > self.rank[rootY]: self.parent[rootY] = rootX else: # ranks are same, so make one as root and increment its rank by one self.parent[rootY] = rootX self.rank[rootX] += 1 return True def part3(data: str): # parse input data into list of DNA sequences dnas = [list(dna.split(":")[1]) for dna in data.splitlines()] candidates = len(dnas) dsu = DSU(candidates) children_gen = yield_children(dnas) # union children with their parents for child, (p1, p2) in children_gen: dsu.union(child, p1) dsu.union(child, p2) # record [size, scale_sum] for each group groups = {} for scale_idx in range(candidates): # find the group of the current candidate group = dsu.find(scale_idx) # update group's size and scale sum entry = groups.setdefault(group, [0, 0]) entry[0] += 1 entry[1] += scale_idx + 1 # return the maximum scale sum among all groups return max(groups.values())[1] assert ( part3("""1:GCAGGCGAGTATGATACCCGGCTAGCCACCCC 2:TCTCGCGAGGATATTACTGGGCCAGACCCCCC 3:GGTGGAACATTCGAAAGTTGCATAGGGTGGTG 4:GCTCGCGAGTATATTACCGAACCAGCCCCTCA 5:GCAGCTTAGTATGACCGCCAAATCGCGACTCA 6:AGTGGAACCTTGGATAGTCTCATATAGCGGCA 7:GGCGTAATAATCGGATGCTGCAGAGGCTGCTG""") ) == 12 assert ( part3("""1:GCAGGCGAGTATGATACCCGGCTAGCCACCCC 2:TCTCGCGAGGATATTACTGGGCCAGACCCCCC 3:GGTGGAACATTCGAAAGTTGCATAGGGTGGTG 4:GCTCGCGAGTATATTACCGAACCAGCCCCTCA 5:GCAGCTTAGTATGACCGCCAAATCGCGACTCA 6:AGTGGAACCTTGGATAGTCTCATATAGCGGCA 7:GGCGTAATAATCGGATGCTGCAGAGGCTGCTG 8:GGCGTAAAGTATGGATGCTGGCTAGGCACCCG""") ) == 36union find: nice! I was too lazy to use union find, so I brute forced merging of the families, it was fast enough :)
Yeah I’ve got the DSU algorithm ingrained because of the number of the times I had to practice it for coding rounds. I didn’t need to do path compression and union by rank either but might as well.
Wow, your interviews were tough. I was never asked anything even remotely this difficult.