@FrankZhang6/

BuzzingAntiqueDeveloper-1

Python

No description

fork
loading
Files
  • main.py
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import sys
import re

"""
This program takes one command-line argument, which must be a valid, uppercase DNA sequence.
It should print all six possible amino acid translations, one per line.
"""

"""
revcomp computes the reverse complement of a DNA sequence.
It takes one argument, a string called s.
Currently, it just returns s.
Change the function AND THIS COMMENT so it returns the reverse complement of s.
"""
def revcomp(s):
    result = ""
    complement = {'A':'T', 'T':'A', 'C':'G', 'G':'C'}
    for letter in s:
        result += complement[letter]
    return result[::-1]


"""
valid takes one argument, a string called s.
It returns true if and only if s contains no characters other than A, C, T, and G.
It returns false otherwise.
"""
def valid(s):
    p = re.compile('[^ACTG]')
    return (p.search(s) == None)

"""
translate takes one argument, a string called s.
It generates all six possible translations of s:
three reading frames for the forward direction and
three for the reverse complement.
"""
def translate(seq):
    results = []
    genecode = {
        "ATA": 'I', "ATC": 'I', "ATT": 'I', "ATG": 'M',
        "ACA": 'T', "ACC": 'T', "ACG": 'T', "ACT": 'T',
        "AAC": 'N', "AAT": 'N', "AAA": 'K', "AAG": 'K',
        "AGC": 'S', "AGT": 'S', "AGA": 'R', "AGG": 'R',
        "CTA": 'L', "CTC": 'L', "CTG": 'L', "CTT": 'L',
        "CCA": 'P', "CCC": 'P', "CCG": 'P', "CCT": 'P',
        "CAC": 'H', "CAT": 'H', "CAA": 'Q', "CAG": 'Q',
        "CGA": 'R', "CGC": 'R', "CGG": 'R', "CGT": 'R',
        "GTA": 'V', "GTC": 'V', "GTG": 'V', "GTT": 'V',
        "GCA": 'A', "GCC": 'A', "GCG": 'A', "GCT": 'A',
        "GAC": 'D', "GAT": 'D', "GAA": 'E', "GAG": 'E',
        "GGA": 'G', "GGC": 'G', "GGG": 'G', "GGT": 'G',
        "TCA": 'S', "TCC": 'S', "TCG": 'S', "TCT": 'S',
        "TTC": 'F', "TTT": 'F', "TTA": 'L', "TTG": 'L',
        "TAC": 'Y', "TAT": 'Y', "TAA": '_', "TAG": '_',
        "TGC": 'C', "TGT": 'C', "TGA": '_', "TGG": 'W',
    }
"""
YOUR HOMEWORK STARTS HERE:
Right now, this returns an empty list. Instead, you should add code before the return statement
so that it returns a list of six amino acid sequences. The order of those sequences is not important.
You'll want to use the revcomp() function provided.
Any amino acid sequence containing a '_' (stop codon) should be truncated there (don't include the '_').
Some helpful hints:
To get the length of a string, use the len() function.
To prematurely end a loop, use the break keyword.
Remember that while the + operator on two numbers adds them, the plus operator on two strings concatenates them.
To add an element to a list, use the append *method* (it's a method, so the syntax is different).
Example: 
results = []
results.append(seq)
Remember that the range() function can take up to three arguments: range(start, stop, step).
Both start and step are optional. Start defaults to 0. Step defaults to 1. Think about how to use this
to look at codons, which are groups of 3 letters, from a longer string.
"""
    return results


assert len(sys.argv) == 2
sequence = sys.argv[1]
assert valid(sequence)

for s in translate(sequence):
    print(s)