Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1import gzip 

2import uuid 

3from pathlib import Path 

4from typing import List 

5 

6import numpy as np 

7 

8import proteinsolver 

9from proteinsolver.dashboard.msa_view import MSASeq 

10 

11 

12def generate_random_sequence(length=80, seed=None): 

13 amino_acids = np.array(proteinsolver.utils.AMINO_ACIDS) 

14 if seed is None: 

15 choice = np.random.choice 

16 else: 

17 choice = np.random.RandomState(seed).choice 

18 return "".join(choice(amino_acids, length)) 

19 

20 

21def save_sequences(sequences: List[MSASeq], output_folder: Path) -> Path: 

22 sequences_fasta = sequences_to_fasta(sequences) 

23 sequences_fasta_gz = gzip.compress(sequences_fasta.encode("utf-8")) 

24 

25 output_file = output_folder.joinpath(f"{uuid.uuid4()}.fasta.gz") 

26 with output_file.open("wb") as fout: 

27 fout.write(sequences_fasta_gz) 

28 

29 return output_file 

30 

31 

32def sequences_to_fasta(sequences: List[MSASeq], line_width=80) -> str: 

33 sequence_string = "" 

34 for sequence in sequences: 

35 sequence_string += f">{sequence.id}|{sequence.name}|{sequence.proba}|{sequence.logproba}\n" 

36 for start in range(0, len(sequence.seq), line_width): 

37 sequence_string += sequence.seq[start : start + line_width] + "\n" 

38 return sequence_string