Commit 7028e34889cc297551c110fbd68fa3b838f24027

Authored by Jean-Michel Garant
1 parent ce612252
Exists in stable-0.3 and in 1 other branch master

argparse implemented

Showing 1 changed file with 53 additions and 18 deletions   Show diff stats
screen.py
... ... @@ -18,6 +18,7 @@
18 18  
19 19 from g4base import *
20 20 import os
  21 +import sys
21 22 import argparse
22 23  
23 24 def apply_network(ann,
... ... @@ -41,8 +42,8 @@ def apply_network(ann,
41 42 'transcript_stable_id','full_name','HGNC_id','identifier',
42 43 'source','genome_assembly','chromosome','start','end','strand',
43 44 'length','sequence','cGcC','G4H','G4NN']
44   - else:
45   - columns = regex.split(",", columns.strip("[]"))
  45 + #else:
  46 + # columns = regex.split(",", columns.strip("[]"))
46 47 columns_to_drop = []
47 48 for essential in ['length', 'sequence', 'g4']:
48 49 if essential not in columns:
... ... @@ -60,9 +61,9 @@ def apply_network(ann,
60 61 else:
61 62 screen_usage(52, 'fasta input not specified or not supported')
62 63 if 'G4NN' in columns:
63   - network_file = open(ann,'r')
64   - ann = pickle.load(network_file)
65   - network_file.close()
  64 + # network_file = open(ann,'r')
  65 + ann = pickle.load(ann)
  66 + # network_file.close()
66 67 RNome_trans_df = trimer_transfo(RNome_df, 'sequence', verbose=verbose)
67 68 # RNome_trans_df = kmer_transfo(RNome_df, 3, 'length', 'sequence', 'g4',
68 69 # int(wdw_len), jellyfish=False, overlapped=True,
... ... @@ -213,7 +214,7 @@ def main():
213 214 else:
214 215 screen_usage(50, 'An option is missing, incorrect or not authorized')
215 216  
216   -class Formatter(argparse.ArgumentDefaultsHelpFormatter):
  217 +class Formatter(argparse.HelpFormatter):
217 218 # use defined argument order to display usage
218 219 def _format_usage(self, usage, actions, groups, prefix):
219 220 if prefix is None:
... ... @@ -247,29 +248,34 @@ def arguments():
247 248 "software, and you are welcome to redistribute it under certain "\
248 249 "conditions <http://www.gnu.org/licenses/>.")
249 250 parser.add_argument('FASTA',
250   - type=argparse.FileType('r'),
  251 + type=str,
251 252 help='FASTA file (.fa .fas)')
252 253 parser.add_argument("-a", "--ann",
253 254 type=argparse.FileType('r'),
254 255 default=os.path.dirname(__file__)+"/G4RNA_2016-11-07.pkl",
255   - help="Supply a picled ANN (.pkl format)")
  256 + help="Supply a picled ANN (default: G4RNA_2016-11-07.pkl)")
256 257 parser.add_argument("-w", "--window",
257 258 type=int,
258 259 default=60,
259   - help="Window length",
  260 + help="Window length (default: 60)",
260 261 metavar="INT")
261 262 parser.add_argument("-s", "--step",
262 263 type=int,
263 264 default=10,
264   - help="Step length between windows",
  265 + help="Step length between windows (default: 10)",
265 266 metavar="INT")
266 267 parser.add_argument("-b", "--bedgraph",
267 268 action="store_true",
268 269 default=False,
269 270 help="Display output as BedGraph, user must provides columns")
  271 + ## TODO use choices of three scores as bedgraph options which will
  272 + ## select columns for the user, must include verifications
270 273 parser.add_argument("-c", "--columns",
271 274 nargs="+",
272   - choices=["list", "all", "description", "gene_symbol",
  275 + choices=["list",
  276 + "all",
  277 + "description",
  278 + "gene_symbol",
273 279 "mrnaAcc",
274 280 "protAcc",
275 281 "gene_stable_id",
... ... @@ -290,8 +296,9 @@ def arguments():
290 296 "G4H",
291 297 "G4NN",
292 298 ],
293   - default="description",
294   - help="Columns to display. To browse available columns use: -c list",
  299 + default=["description","sequence","start","cGcC","G4H","G4NN"],
  300 + help="Columns to display (default: description). "\
  301 + "To browse available columns use: -c list",
295 302 metavar="")
296 303 parser.add_argument("-v", "--verbose",
297 304 action="store_true",
... ... @@ -308,9 +315,11 @@ def to_replace_main():
308 315 """
309 316 Functions calls
310 317 """
311   - args = arguments().parse_args()
  318 + parser = arguments()
  319 + args = parser.parse_args()
312 320 if args.columns == ["list"]:
313   - splitted_help = arguments().format_help().split(". To browse available columns use:\n\
  321 + splitted_help = parser.format_help().split(
  322 + ". To browse available columns use:\n\
314 323 -c list (default: description)")
315 324 print("\n\t".join([splitted_help[0],
316 325 "Available columns:",
... ... @@ -338,8 +347,34 @@ def to_replace_main():
338 347 "G4NN \t\tG4NN score of similitude",
339 348 " \t\t(must be specified to use ANN)",
340 349 splitted_help[1]]))
341   - print(args)
  350 + if args.bedgraph and len(args.columns) != 4 and args.columns[-1] not in [
  351 + 'cGcC', 'G4H', 'G4NN']:
  352 + parser.print_usage()
  353 + sys.stderr.write(parser.prog+': error: '\
  354 + 'BedGraph format requires 4 columns: '\
  355 + 'chromosome start end [SCORE]\n'\
  356 + 'where [SCORE] is either cGcC, G4H or G4NN\n')
  357 + sys.exit()
  358 + print args
  359 + try:
  360 + apply_network(args.ann,
  361 + args.FASTA,
  362 + args.columns,
  363 + args.window,
  364 + args.step,
  365 + args.bedgraph,
  366 + args.verbose
  367 + ).to_csv(
  368 + path_or_buf=sys.stdout, sep='\t',
  369 + index=(args.bedgraph==False),
  370 + header=(args.bedgraph==False))
  371 + except:
  372 + if args.error:
  373 + raise
  374 + else:
  375 + screen_usage(50, 'An option is missing, incorrect or not authorized')
  376 +
342 377  
343 378 if __name__ == '__main__':
344   - main()
345   -# to_replace_main()
  379 +# main()
  380 + to_replace_main()
... ...