Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1from typing import List 

2 

3import numpy as np 

4from numba import njit 

5 

6AMINO_ACIDS: List[str] = [ 

7 "G", 

8 "V", 

9 "A", 

10 "L", 

11 "I", 

12 "C", 

13 "M", 

14 "F", 

15 "W", 

16 "P", 

17 "D", 

18 "E", 

19 "S", 

20 "T", 

21 "Y", 

22 "Q", 

23 "N", 

24 "K", 

25 "R", 

26 "H", 

27] 

28 

29AMINO_ACIDS_ORD: List[int] = [ 

30 71, 

31 86, 

32 65, 

33 76, 

34 73, 

35 67, 

36 77, 

37 70, 

38 87, 

39 80, 

40 68, 

41 69, 

42 83, 

43 84, 

44 89, 

45 81, 

46 78, 

47 75, 

48 82, 

49 72, 

50] 

51 

52assert all(ord(AMINO_ACIDS[i]) == AMINO_ACIDS_ORD[i] for i in range(len(AMINO_ACIDS))) 

53 

54 

55AMINO_ACID_TO_IDX = {aa: i for i, aa in enumerate(AMINO_ACIDS + ["-"])} 

56 

57 

58@njit 

59def seq_to_tensor(seq: bytes) -> np.ndarray: 

60 amino_acids = [71, 86, 65, 76, 73, 67, 77, 70, 87, 80, 68, 69, 83, 84, 89, 81, 78, 75, 82, 72] 

61 # skip_char = 46 # ord('.') 

62 out = np.ones(len(seq)) * 20 

63 for i, aa in enumerate(seq): 

64 for j, aa_ref in enumerate(amino_acids): 

65 if aa == aa_ref: 

66 out[i] = j 

67 break 

68 return out 

69 

70 

71def array_to_seq(array: np.ndarray) -> str: 

72 seq = "".join(AMINO_ACIDS[i] for i in array) 

73 return seq