Commit c1d735bb3f915dbda5fc3140911b8d027077b861

Authored by Jean-Michel Garant
1 parent 4193d2c1

Default to stdin and range detection without ID

LICENSE
... ... @@ -631,8 +631,8 @@ to attach them to the start of each source file to most effectively
631 631 state the exclusion of warranty; and each file should have at least
632 632 the "copyright" line and a pointer to where the full notice is found.
633 633  
634   - {one line to give the program's name and a brief idea of what it does.}
635   - Copyright (C) {year} {name of author}
  634 + G4RNA screener, identification of potential RNA G-quadruplexes.
  635 + Copyright (C) 2017 Jean-Michel Garant
636 636  
637 637 This program is free software: you can redistribute it and/or modify
638 638 it under the terms of the GNU General Public License as published by
... ... @@ -652,7 +652,7 @@ Also add information on how to contact you by electronic and paper mail.
652 652 If the program does terminal interaction, make it output a short
653 653 notice like this when it starts in an interactive mode:
654 654  
655   - {project} Copyright (C) {year} {fullname}
  655 + G4RNA screener Copyright (C) 2017 Jean-Michel Garant
656 656 This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 657 This is free software, and you are welcome to redistribute it
658 658 under certain conditions; type `show c' for details.
... ...
MANUAL.md
... ... @@ -36,15 +36,15 @@ usherbrooke.ca/G4RNA/). It also provides the previously described:
36 36  
37 37 > Show version information and exit.
38 38  
39   -**_-a, --ann_**
  39 +**_-a, --ann_** = G4RNA_2016-11-07.pkl
40 40  
41 41 > Path to the pickled ANN (.pkl) which will provide the program a particular
42 42 pattern to evaluate each sequences or windows of sequences.
43 43  
44   -**_-f, --fasta_**
  44 +**_-f, --fasta_** = STDIN
45 45  
46 46 > Path to the fasta file to analyze. Will support string value as long as it
47   -respects the fasta format. Use /dev/stdin to feed standard input to -f argument.
  47 +respects the fasta format. Use "STDIN" to feed standard input to -f argument.
48 48  
49 49 **_-w, --window_**
50 50  
... ...
g4base.py
... ... @@ -139,8 +139,8 @@ def gen_G4RNA_df(
139 139 content['start'] = int(nd-no*window_step-len(s)+1)
140 140 content['end'] = int(nd-no*window_step)
141 141 else:
142   - content['start'] = int(no*window_step)
143   - content['end'] = int(no*window_step+len(s)-1)
  142 + content['start'] = int(no*window_step+1)
  143 + content['end'] = int(no*window_step+len(s))
144 144 content['length'] = len(s)
145 145 content['sequence'] = s
146 146 content['cGcC'] = cgcc_scorer(s)
... ...
screen.py
... ... @@ -120,8 +120,9 @@ def main():
120 120 #Default values here in option_dict
121 121 option_dict = {"--columns":"description,sequence,G4NN",
122 122 "--window":False,
  123 + "--ann":"G4RNA_2016-11-07.pkl",
123 124 "--step":10,
124   - "--fasta":False}
  125 + "--fasta":"STDIN"}
125 126 for no, arg in enumerate(sys.argv):
126 127 if arg[0] == "-":
127 128 if arg in ["-?","--help"]:
... ... @@ -148,10 +149,11 @@ def main():
148 149 screen_usage('No value provided for option "%s"'%arg)
149 150 else:
150 151 screen_usage('Argument "%s" not recognized'%arg)
151   - if len(sys.argv) == 1 \
152   - or ("-c" in option_dict.keys() and option_dict["-c"] == "list") \
  152 + if ("-c" in option_dict.keys() and option_dict["-c"] == "list") \
153 153 or (option_dict["--columns"] == "list"):
154 154 screen_usage()
  155 + if len(sys.argv) == 1 and sys.stdin.isatty():
  156 + screen_usage("no arguments detected")
155 157 if ("-b" in option_dict.keys() or "--bedgraph" in option_dict.keys()):
156 158 if "-c" in option_dict.keys():
157 159 column_str = "-c"
... ... @@ -165,6 +167,10 @@ def main():
165 167 screen_usage('bedGraph format requires 4 columns: \
166 168 chrome,start,end,[SCORE]\n\
167 169 where [SCORE] is either cGcC, G4H or G4NN')
  170 + if "-f" in option_dict.keys() and option_dict['-f'] == "STDIN":
  171 + option_dict['-f'] = "/dev/stdin"
  172 + elif option_dict['--fasta'] == "STDIN":
  173 + option_dict['--fasta'] = "/dev/stdin"
168 174 try:
169 175 apply_network(option_dict.get("-a") or option_dict.get("--ann"),
170 176 option_dict.get("-f") or option_dict.get("--fasta"),
... ...
utils.py
... ... @@ -157,7 +157,12 @@ def format_description(fas_description, verbose=None):
157 157 except:
158 158 verbosify(verbose,"Ensembl not recognised for %s"%fas_description)
159 159 if 'description' not in infos.keys() or infos.get('description') == '':
160   - infos['description'] = fas_description
  160 + try:
  161 + infos = regex.search("(?<description>(?:.*)(GRCh\d\d:)?(hg\d\d)?(?\
  162 +<chromosome>(chr)?[^:]*):(?<start>\d*)[:-](?<end>\d*)(?::)(?<strand>[+-1]?)?)",
  163 + fas_description).groupdict()
  164 + except:
  165 + infos['description'] = fas_description
161 166 if 'strand' in infos.keys() and infos.get('strand') == '1':
162 167 infos['strand'] = '+'
163 168 if infos.get('strand') not in ['+','-'] and infos.get('start')\
... ...