Quest 9: Encoded in the Scales

  • Keep top level comments as only solutions, if you want to say something other than a solution put it in a new post. (replies to comments can be whatever)
  • You can send code in code blocks by using three backticks, the code, and then three backticks or use something such as https://topaz.github.io/paste/ if you prefer sending it through a URL

Link to participate: https://everybody.codes/

  • Pyro
    link
    fedilink
    arrow-up
    2
    ·
    1 个月前

    Python

    # returns parents of child if found, else None
    def get_parents(dnas: list[list[str]], child: int):
        candidates = len(dnas)
        seq_len = len(dnas[0])
    
        for p1 in range(candidates):
            if p1 == child:
                continue
            for p2 in range(p1 + 1, candidates):
                if p2 == child:
                    continue
    
                for idx in range(seq_len):
                    if dnas[child][idx] not in [dnas[p1][idx], dnas[p2][idx]]:
                        # mismatch found
                        break
                else:
                    # no-break => all matched
                    return p1, p2
        return None
    
    # yields all children with their parents from a collection of dnas
    def yield_children(dnas: list[list[str]]):
        for child in range(len(dnas)):
            parents = get_parents(dnas, child)
            if parents is not None:
                yield child, parents
    
    
    def part1(data: str):
        # parse input data into list of DNA sequences
        dnas = [list(dna.split(":")[1]) for dna in data.splitlines()]
        seq_len = len(dnas[0])
    
        child, parents = next(yield_children(dnas))
        similarities = [0, 0]
        for idx in range(seq_len):
            for pi in range(2):
                if dnas[child][idx] == dnas[parents[pi]][idx]:
                    similarities[pi] += 1
    
        return similarities[0] * similarities[1]
    
    
    assert (
        part1("""1:CAAGCGCTAAGTTCGCTGGATGTGTGCCCGCG
    2:CTTGAATTGGGCCGTTTACCTGGTTTAACCAT
    3:CTAGCGCTGAGCTGGCTGCCTGGTTGACCGCG""")
        == 414
    )
    
    
    def part2(data: str):
        # parse input data into list of DNA sequences
        dnas = [list(dna.split(":")[1]) for dna in data.splitlines()]
        seq_len = len(dnas[0])
    
        children_gen = yield_children(dnas)
    
        all_similarity = 0
        for child, parents in children_gen:
            similarities = [0, 0]
            for idx in range(seq_len):
                for pi in range(2):
                    if dnas[child][idx] == dnas[parents[pi]][idx]:
                        similarities[pi] += 1
            all_similarity += similarities[0] * similarities[1]
        return all_similarity
    
    
    assert (
        part2("""1:GCAGGCGAGTATGATACCCGGCTAGCCACCCC
    2:TCTCGCGAGGATATTACTGGGCCAGACCCCCC
    3:GGTGGAACATTCGAAAGTTGCATAGGGTGGTG
    4:GCTCGCGAGTATATTACCGAACCAGCCCCTCA
    5:GCAGCTTAGTATGACCGCCAAATCGCGACTCA
    6:AGTGGAACCTTGGATAGTCTCATATAGCGGCA
    7:GGCGTAATAATCGGATGCTGCAGAGGCTGCTG""")
        == 1245
    )
    
    
    # Disjoint Set Union (Union-Find) data structure
    #   with path compression and union by rank
    class DSU:
        def __init__(self, size):
            self.parent = list(range(size))
            self.rank = [1] * size
    
        def find(self, x):
            # path compression
            if self.parent[x] != x:
                self.parent[x] = self.find(self.parent[x])
            return self.parent[x]
    
        def union(self, x, y):
            rootX = self.find(x)
            rootY = self.find(y)
    
            if rootX == rootY:
                return False
    
            # union by rank
            #   attach smaller rank tree under root of higher rank tree
            if self.rank[rootX] < self.rank[rootY]:
                self.parent[rootX] = rootY
            elif self.rank[rootX] > self.rank[rootY]:
                self.parent[rootY] = rootX
            else:
                # ranks are same, so make one as root and increment its rank by one
                self.parent[rootY] = rootX
                self.rank[rootX] += 1
    
            return True
    
    
    def part3(data: str):
        # parse input data into list of DNA sequences
        dnas = [list(dna.split(":")[1]) for dna in data.splitlines()]
        candidates = len(dnas)
    
        dsu = DSU(candidates)
        children_gen = yield_children(dnas)
    
        # union children with their parents
        for child, (p1, p2) in children_gen:
            dsu.union(child, p1)
            dsu.union(child, p2)
        
        # record [size, scale_sum] for each group
        groups = {}
        for scale_idx in range(candidates):
            # find the group of the current candidate
            group = dsu.find(scale_idx)
            # update group's size and scale sum
            entry = groups.setdefault(group, [0, 0])
            entry[0] += 1
            entry[1] += scale_idx + 1
        
        # return the maximum scale sum among all groups
        return max(groups.values())[1]
    
    
    assert (
        part3("""1:GCAGGCGAGTATGATACCCGGCTAGCCACCCC
    2:TCTCGCGAGGATATTACTGGGCCAGACCCCCC
    3:GGTGGAACATTCGAAAGTTGCATAGGGTGGTG
    4:GCTCGCGAGTATATTACCGAACCAGCCCCTCA
    5:GCAGCTTAGTATGACCGCCAAATCGCGACTCA
    6:AGTGGAACCTTGGATAGTCTCATATAGCGGCA
    7:GGCGTAATAATCGGATGCTGCAGAGGCTGCTG""")
    ) == 12
    
    assert (
        part3("""1:GCAGGCGAGTATGATACCCGGCTAGCCACCCC
    2:TCTCGCGAGGATATTACTGGGCCAGACCCCCC
    3:GGTGGAACATTCGAAAGTTGCATAGGGTGGTG
    4:GCTCGCGAGTATATTACCGAACCAGCCCCTCA
    5:GCAGCTTAGTATGACCGCCAAATCGCGACTCA
    6:AGTGGAACCTTGGATAGTCTCATATAGCGGCA
    7:GGCGTAATAATCGGATGCTGCAGAGGCTGCTG
    8:GGCGTAAAGTATGGATGCTGGCTAGGCACCCG""")
    ) == 36
    
    • hadesOPM
      link
      fedilink
      arrow-up
      2
      ·
      1 个月前

      union find: nice! I was too lazy to use union find, so I brute forced merging of the families, it was fast enough :)

      • Pyro
        link
        fedilink
        arrow-up
        1
        ·
        1 个月前

        Yeah I’ve got the DSU algorithm ingrained because of the number of the times I had to practice it for coding rounds. I didn’t need to do path compression and union by rank either but might as well.

        • hadesOPM
          link
          fedilink
          arrow-up
          1
          ·
          1 个月前

          Wow, your interviews were tough. I was never asked anything even remotely this difficult.