Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from typing import List
3import numpy as np
4from numba import njit
6AMINO_ACIDS: List[str] = [
7 "G",
8 "V",
9 "A",
10 "L",
11 "I",
12 "C",
13 "M",
14 "F",
15 "W",
16 "P",
17 "D",
18 "E",
19 "S",
20 "T",
21 "Y",
22 "Q",
23 "N",
24 "K",
25 "R",
26 "H",
27]
29AMINO_ACIDS_ORD: List[int] = [
30 71,
31 86,
32 65,
33 76,
34 73,
35 67,
36 77,
37 70,
38 87,
39 80,
40 68,
41 69,
42 83,
43 84,
44 89,
45 81,
46 78,
47 75,
48 82,
49 72,
50]
52assert all(ord(AMINO_ACIDS[i]) == AMINO_ACIDS_ORD[i] for i in range(len(AMINO_ACIDS)))
55AMINO_ACID_TO_IDX = {aa: i for i, aa in enumerate(AMINO_ACIDS + ["-"])}
58@njit
59def seq_to_tensor(seq: bytes) -> np.ndarray:
60 amino_acids = [71, 86, 65, 76, 73, 67, 77, 70, 87, 80, 68, 69, 83, 84, 89, 81, 78, 75, 82, 72]
61 # skip_char = 46 # ord('.')
62 out = np.ones(len(seq)) * 20
63 for i, aa in enumerate(seq):
64 for j, aa_ref in enumerate(amino_acids):
65 if aa == aa_ref:
66 out[i] = j
67 break
68 return out
71def array_to_seq(array: np.ndarray) -> str:
72 seq = "".join(AMINO_ACIDS[i] for i in array)
73 return seq