1use crate::rescue::SearchOption;
52use std::path::PathBuf;
53
54use clap::{Parser, Subcommand};
55
56mod assemble;
57mod build;
58mod cluster;
59mod coassemble;
60mod consensus;
61mod correct;
62mod eval_model;
63mod fetch;
64mod filter;
65mod genotype;
66mod impute;
67mod phase;
68mod recruit;
69mod rescue;
70mod train;
71mod trim;
72
73#[derive(Debug, Parser)]
74#[clap(name = "hidive")]
75#[clap(about = "Analysis of high-diversity loci through genome co-assembly of long/short reads.", long_about = None)]
76struct Cli {
78 #[clap(subcommand)]
79 command: Commands,
80}
81
82const DEFAULT_KMER_SIZE: usize = 17;
83const DEFAULT_WINDOW_SIZE: usize = 1000;
84
85#[derive(Debug, Subcommand)]
86enum Commands {
87 #[clap(arg_required_else_help = true)]
89 Train {
90 #[clap(short, long, value_parser, default_value = "/dev/stdout")]
92 output: PathBuf,
93
94 #[clap(short, long, value_parser, default_value_t = DEFAULT_KMER_SIZE)]
96 kmer_size: usize,
97
98 #[clap(short, long, value_parser, default_value_t = 200)]
103 iterations: usize,
104
105 #[clap(short, long, value_parser, required = true)]
107 long_read_seq_paths: Vec<PathBuf>,
108
109 #[clap(short, long, value_parser, required = false)]
111 short_read_seq_paths: Vec<PathBuf>,
112
113 #[clap(long, value_parser, required = true, num_args = 2)]
115 truth_seq_paths: Vec<PathBuf>,
116
117 #[clap(long, value_parser, required = false)]
120 test_long_read_seq_paths: Vec<PathBuf>,
121
122 #[clap(long, value_parser, required = false)]
125 test_short_read_seq_paths: Vec<PathBuf>,
126
127 #[clap(long, value_parser, required = false, num_args = 2)]
130 test_truth_seq_paths: Vec<PathBuf>,
131
132 #[clap(short, long, value_parser)]
134 debug: bool,
135 },
136
137 #[clap(arg_required_else_help = true)]
139 EvalModel {
140 #[clap(short, long, value_parser, default_value = "/dev/stdout")]
142 output: PathBuf,
143
144 #[clap(short, long, value_parser, default_value_t = DEFAULT_KMER_SIZE)]
146 kmer_size: usize,
147
148 #[clap(short, long, value_parser, required = true)]
150 long_read_seq_paths: Vec<PathBuf>,
151
152 #[clap(short, long, value_parser, required = false)]
154 short_read_seq_paths: Vec<PathBuf>,
155
156 #[clap(long, value_parser, required = true, num_args = 2)]
158 truth_seq_paths: Vec<PathBuf>,
159
160 #[clap(short, long, value_parser, required = true)]
162 model_path: PathBuf,
163 },
164
165 #[clap(arg_required_else_help = true)]
167 Fetch {
168 #[clap(short, long, value_parser, default_value = "/dev/stdout")]
170 output: PathBuf,
171
172 #[clap(short, long, value_parser, required = true)]
174 loci: Vec<String>,
175
176 #[clap(short, long, value_parser, default_value_t = 0)]
178 padding: u64,
179
180 #[clap(long, value_parser)]
182 haplotype: Option<u8>,
183
184 #[clap(required = true, value_parser)]
186 seq_paths: Vec<PathBuf>,
187 },
188
189 #[clap(arg_required_else_help = true)]
191 Rescue {
192 #[clap(short, long, value_parser, default_value = "/dev/stdout")]
194 output: PathBuf,
195
196 #[clap(short, long, value_parser, default_value_t = DEFAULT_KMER_SIZE)]
198 kmer_size: usize,
199
200 #[clap(short, long, value_parser, default_value_t = 70)]
202 min_kmers_pct: usize,
203
204 #[clap(short, long, default_value = "contig-and-interval")]
206 search_option: SearchOption,
207
208 #[clap(short, long, value_parser, required = true)]
210 ref_path: Option<PathBuf>,
211
212 #[clap(short, long, value_parser, required = false)]
214 loci: Option<Vec<String>>,
215
216 #[clap(short, long, value_parser, required = true)]
218 fastx_paths: Vec<PathBuf>,
219
220 #[clap(required = true, value_parser)]
222 seq_paths: Vec<PathBuf>,
223 },
224
225 #[clap(arg_required_else_help = true)]
227 Recruit {
228 #[clap(short, long, value_parser, default_value = "/dev/stdout")]
230 output: PathBuf,
231
232 #[clap(short, long, value_parser, default_value_t = DEFAULT_KMER_SIZE)]
234 kmer_size: usize,
235
236 #[clap(short, long, value_parser, default_value_t = 70)]
238 min_kmers_pct: usize,
239
240 #[clap(short, long, value_parser, required = true)]
242 fasta_paths: Vec<PathBuf>,
243
244 #[clap(required = true, value_parser)]
246 seq_paths: Vec<PathBuf>,
247 },
248
249 #[clap(arg_required_else_help = true)]
251 Filter {
252 #[clap(short, long, value_parser, default_value = "/dev/stdout")]
254 output: PathBuf,
255
256 #[clap(short, long, value_parser)]
258 gfa_path: PathBuf,
259
260 #[clap(required = true, value_parser)]
262 short_read_fasta_paths: Vec<PathBuf>,
263 },
264
265 #[clap(arg_required_else_help = true)]
267 Cluster {
268 #[clap(short, long, value_parser, default_value = "/dev/stdout")]
270 output: PathBuf,
271
272 #[clap(short, long, required = true, value_parser)]
274 sample_name: String,
275
276 #[clap(short, long, value_parser, required = true)]
278 from_loci: Vec<String>,
279
280 #[clap(short, long, value_parser, required = true)]
282 to_loci: Vec<String>,
283
284 #[clap(short, long, value_parser, required = true)]
286 ref_path: PathBuf,
287
288 #[clap(required = true, value_parser)]
290 bam_path: PathBuf,
291 },
292
293 #[clap(arg_required_else_help = true)]
295 Consensus {
296 #[clap(short, long, value_parser, default_value = "/dev/stdout")]
298 output: PathBuf,
299
300 #[clap(short, long, value_parser, required = true)]
302 loci: Vec<String>,
303
304 #[clap(required = true, value_parser)]
313 bam_path: PathBuf,
314 },
315
316 #[clap(arg_required_else_help = true)]
318 Trim {
319 #[clap(short, long, value_parser, default_value = "/dev/stdout")]
321 output: PathBuf,
322
323 #[clap(short, long, value_parser)]
325 loci: Vec<String>,
326
327 #[clap(required = true, value_parser)]
329 bam_path: PathBuf,
330 },
331
332 #[clap(arg_required_else_help = true)]
334 Build {
335 #[clap(short, long, value_parser, default_value = "/dev/stdout")]
337 output: PathBuf,
338
339 #[clap(short, long, value_parser, default_value_t = DEFAULT_KMER_SIZE)]
341 kmer_size: usize,
342
343 #[clap(short, long, value_parser, required = true)]
345 reference_name: String,
346
347 #[clap(required = true, value_parser)]
349 fasta_path: PathBuf,
350 },
351
352 #[clap(arg_required_else_help = true)]
354 Impute {
355 #[clap(short, long, value_parser, default_value = "/dev/stdout")]
357 output: PathBuf,
358
359 #[clap(required = true, value_parser)]
361 graph: PathBuf,
362 },
363
364 #[clap(arg_required_else_help = true)]
366 Assemble {
367 #[clap(short, long, value_parser, default_value = "/dev/stdout")]
369 output: PathBuf,
370
371 #[clap(short, long, value_parser, required = false)]
373 gfa_output: Option<PathBuf>,
374
375 #[clap(short, long, value_parser, default_value_t = DEFAULT_KMER_SIZE)]
377 kmer_size: usize,
378
379 #[clap(short, long, required = true, value_parser)]
381 model_path: PathBuf,
382
383 #[clap(required = true, value_parser)]
385 long_read_fasta_path: PathBuf,
386
387 #[clap(required = true, value_parser)]
389 short_read_fasta_path: PathBuf,
390 },
391
392 #[clap(arg_required_else_help = true)]
394 Correct {
395 #[clap(short, long, value_parser, default_value = "/dev/stdout")]
397 output: PathBuf,
398
399 #[clap(short, long, value_parser, required = true)]
401 loci: Vec<String>,
402
403 #[clap(short, long, value_parser, default_value_t = DEFAULT_KMER_SIZE)]
405 kmer_size: usize,
406
407 #[clap(short, long, value_parser, default_value_t = DEFAULT_WINDOW_SIZE)]
409 window_size: usize,
410
411 #[clap(short, long, required = true, value_parser)]
413 model_path: PathBuf,
414
415 #[clap(required = true, value_parser)]
417 long_read_fasta_path: PathBuf,
418
419 #[clap(required = true, value_parser)]
421 short_read_fasta_path: PathBuf,
422 },
423
424 #[clap(arg_required_else_help = true)]
426 Coassemble {
427 #[clap(short, long, value_parser, default_value = "/dev/stdout")]
429 output: PathBuf,
430
431 #[clap(short, long, value_parser, default_value_t = DEFAULT_KMER_SIZE)]
433 kmer_size: usize,
434
435 #[clap(short, long, required = true, value_parser)]
437 model_path: PathBuf,
438
439 #[clap(short, long, required = true, value_parser)]
441 reference_fasta_paths: Vec<PathBuf>,
442
443 #[clap(required = true, value_parser)]
445 long_read_fasta_path: PathBuf,
446
447 #[clap(required = false, value_parser)]
449 short_read_fasta_path: PathBuf,
450 },
451
452 #[clap(arg_required_else_help = true)]
454 Phase {
455 #[clap(short, long, value_parser, default_value = "/dev/stdout")]
457 output: PathBuf,
458
459 #[clap(short, long, required = true, value_parser)]
461 sample_name: String,
462
463 #[clap(short, long, value_parser, required = true)]
465 loci: Vec<String>,
466
467 #[clap(short, long, required = true, value_parser)]
469 reference_fasta_path: PathBuf,
470
471 #[clap(required = true, value_parser)]
473 bam_path: PathBuf,
474 },
475
476 #[clap(arg_required_else_help = true)]
478 Genotype {
479 #[clap(short, long, value_parser, default_value = "/dev/stdout")]
481 output: PathBuf,
482
483 #[clap(short, long, required = true, value_parser)]
485 sample_name: String,
486
487 #[clap(short, long, value_parser, required = true)]
489 loci: Vec<String>,
490
491 #[clap(short, long, required = true, value_parser)]
493 reference_fasta_path: PathBuf,
494
495 #[clap(required = true, value_parser)]
497 vcf_path: PathBuf,
498
499 #[clap(required = true, value_parser)]
501 bam_path: PathBuf,
502 },
503}
504
505fn main() {
506 let args = Cli::parse();
507
508 skydive::elog!("Hidive version {}", env!("CARGO_PKG_VERSION"));
509 skydive::elog!("{:?}", args);
510
511 let start_time = std::time::Instant::now();
512
513 match args.command {
514 Commands::Train {
515 output,
516 kmer_size,
517 iterations,
519 long_read_seq_paths,
520 short_read_seq_paths,
521 truth_seq_paths,
522 test_long_read_seq_paths,
523 test_short_read_seq_paths,
524 test_truth_seq_paths,
525 debug,
526 } => {
527 train::start(
528 &output,
529 kmer_size,
530 iterations,
531 &long_read_seq_paths,
533 &short_read_seq_paths,
534 &truth_seq_paths,
535 &test_long_read_seq_paths,
536 &test_short_read_seq_paths,
537 &test_truth_seq_paths,
538 debug,
539 );
540 }
541 Commands::EvalModel {
542 output,
543 kmer_size,
544 long_read_seq_paths,
545 short_read_seq_paths,
546 truth_seq_paths,
547 model_path,
548 } => {
549 eval_model::start(
550 &output,
551 kmer_size,
552 &long_read_seq_paths,
553 &short_read_seq_paths,
554 &truth_seq_paths,
555 &model_path,
556 );
557 }
558 Commands::Fetch {
559 output,
560 loci,
561 padding,
562 haplotype,
563 seq_paths,
564 } => {
565 fetch::start(&output, &loci, padding, haplotype, &seq_paths);
566 }
567 Commands::Rescue {
568 output,
569 kmer_size,
570 min_kmers_pct,
571 search_option,
572 ref_path,
573 loci,
574 fastx_paths,
575 seq_paths,
576 } => {
577 rescue::start(
578 &output,
579 kmer_size,
580 min_kmers_pct,
581 search_option,
582 ref_path,
583 loci,
584 &fastx_paths,
585 &seq_paths,
586 );
587 }
588 Commands::Recruit {
589 output,
590 kmer_size,
591 min_kmers_pct,
592 fasta_paths,
593 seq_paths,
594 } => {
595 recruit::start(&output, kmer_size, min_kmers_pct, &fasta_paths, &seq_paths);
596 }
597 Commands::Filter {
598 output,
599 gfa_path,
600 short_read_fasta_paths,
601 } => {
602 filter::start(&output, &gfa_path, &short_read_fasta_paths);
603 }
604 Commands::Cluster {
605 output,
606 sample_name,
607 from_loci,
608 to_loci,
609 ref_path,
610 bam_path,
611 } => {
612 cluster::start(
613 &output,
614 &sample_name,
615 &from_loci,
616 &to_loci,
617 &ref_path,
618 &bam_path,
619 );
620 }
621 Commands::Consensus {
622 output,
623 loci,
624 bam_path,
627 } => {
628 consensus::start(&output, &loci, &bam_path);
629 }
630 Commands::Trim {
631 output,
632 loci,
633 bam_path,
634 } => {
635 trim::start(&output, &loci, &bam_path);
636 }
637 Commands::Build {
638 output,
639 kmer_size,
640 fasta_path,
641 reference_name,
642 } => {
643 build::start(&output, kmer_size, &fasta_path, reference_name);
644 }
645 Commands::Impute { output, graph } => {
646 impute::start(&output, &graph);
647 }
648 Commands::Assemble {
649 output,
650 gfa_output,
651 kmer_size,
652 model_path,
653 long_read_fasta_path,
654 short_read_fasta_path,
655 } => {
656 assemble::start(
657 &output,
658 gfa_output,
659 kmer_size,
660 &model_path,
661 &long_read_fasta_path,
662 &short_read_fasta_path,
663 );
664 }
665 Commands::Correct {
666 output,
667 loci,
668 kmer_size,
669 window_size,
670 model_path,
671 long_read_fasta_path,
672 short_read_fasta_path,
673 } => {
674 correct::start(
675 &output,
676 &loci,
677 kmer_size,
678 window_size,
679 &model_path,
680 &long_read_fasta_path,
681 &short_read_fasta_path,
682 );
683 }
684 Commands::Coassemble {
685 output,
686 kmer_size,
687 model_path,
688 reference_fasta_paths,
689 long_read_fasta_path,
690 short_read_fasta_path,
691 } => {
692 coassemble::start(
693 &output,
694 kmer_size,
695 &model_path,
696 &reference_fasta_paths,
697 long_read_fasta_path,
698 short_read_fasta_path,
699 );
700 }
701 Commands::Phase {
702 output,
703 sample_name,
704 loci,
705 reference_fasta_path,
706 bam_path,
707 } => {
708 phase::start(
709 &output,
710 &sample_name,
711 &loci,
712 &reference_fasta_path,
713 &bam_path,
714 );
715 }
716 Commands::Genotype {
717 output,
718 sample_name,
719 loci,
720 reference_fasta_path,
721 vcf_path,
722 bam_path,
723 } => {
724 genotype::start(
725 &output,
726 &sample_name,
727 &loci,
728 &reference_fasta_path,
729 &vcf_path,
730 &bam_path,
731 );
732 }
733 }
734
735 skydive::elog!("Complete. Elapsed time: {}.", elapsed_time(start_time));
736}
737
738fn elapsed_time(start_time: std::time::Instant) -> String {
739 let end_time = std::time::Instant::now();
740 let elapsed_time = end_time.duration_since(start_time);
741
742 let elapsed_secs = elapsed_time.as_secs_f64();
743
744 if elapsed_secs < 60.0 {
745 format!("{:.2} seconds", elapsed_secs)
746 } else if elapsed_secs < 3600.0 {
747 format!("{:.2} minutes", elapsed_secs / 60.0)
748 } else if elapsed_secs < 86400.0 {
749 format!("{:.2} hours", elapsed_secs / 3600.0)
750 } else {
751 format!("{:.2} days", elapsed_secs / 86400.0)
752 }
753}