From ae98925062d5a392a73df4a1456df3a4a3be9b18 Mon Sep 17 00:00:00 2001 From: Jesse Luehrs Date: Fri, 8 Mar 2013 20:14:51 -0600 Subject: factor common behavior out into a library --- .gitignore | 1 + DNA.rs | 7 ++++--- GC.rs | 63 +++++-------------------------------------------------- HAMM.rs | 17 ++++++--------- REVC.rs | 29 ++++++------------------- RNA.rs | 12 +++++++---- rosalind/dna.rs | 30 ++++++++++++++++++++++++++ rosalind/fasta.rs | 45 +++++++++++++++++++++++++++++++++++++++ rosalind/io.rs | 5 +++++ rosalind/mod.rc | 8 +++++++ rosalind/str.rs | 21 +++++++++++++++++++ 11 files changed, 139 insertions(+), 99 deletions(-) create mode 100644 rosalind/dna.rs create mode 100644 rosalind/fasta.rs create mode 100644 rosalind/io.rs create mode 100644 rosalind/mod.rc create mode 100644 rosalind/str.rs diff --git a/.gitignore b/.gitignore index 1082015..3fab093 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ .*.sw* data/ rosalind*.txt +rosalind/librosalind*.so diff --git a/DNA.rs b/DNA.rs index dc572e2..12e60d7 100644 --- a/DNA.rs +++ b/DNA.rs @@ -1,4 +1,5 @@ -use io::{stdin,println,ReaderUtil}; +extern mod rosalind; +use rosalind::io::input_line; fn count_nucleotides(dna: &str) -> (int, int, int, int) { let mut (a, c, g, t) = (0, 0, 0, 0); @@ -28,8 +29,8 @@ fn count_nucleotides_2(dna: &str) -> (int, int, int, int) { } fn main() { - let dna = stdin().read_line(); + let dna = input_line(); let (a, c, g, t) = count_nucleotides(dna); /*let (a, c, g, t) = count_nucleotides_2(dna);*/ - println(fmt!("%d %d %d %d", a, c, g, t)); + io::println(fmt!("%d %d %d %d", a, c, g, t)); } diff --git a/GC.rs b/GC.rs index 62dd706..11f18b3 100644 --- a/GC.rs +++ b/GC.rs @@ -1,59 +1,6 @@ -use io::{println,stdin,Reader,ReaderUtil}; -use str::{push_str,unshift_char,each_char,len}; - -const EOF: char = -1 as char; - -struct FASTAReader { - in: Reader, - priv mut peeked: char, -} - -impl FASTAReader { - static fn new() -> FASTAReader { - FASTAReader { in: stdin(), peeked: EOF } - } - - fn read_line(&self) -> ~str { - let mut line = self.in.read_line(); - if self.peeked != '>' { - unshift_char(&mut line, self.peeked); - } - self.peeked = self.in.read_char(); - line - } - - fn read_sequence(&self) -> (~str, ~str) { - if self.peeked == EOF { - self.peeked = self.in.read_char(); - } - - let name = self.read_line(); - let mut dna = ~""; - while !self.in.eof() && self.peeked != '>' { - let line = self.read_line(); - push_str(&mut dna, line); - } - (name, dna) - } - - fn each_sequence(&self, cb: fn(~str, ~str) -> bool) { - while !self.in.eof() { - let (name, dna) = self.read_sequence(); - cb(name, dna); - } - } -} - -pure fn gc_content(dna: ~str) -> float { - let mut content = 0; - for each_char(dna) |ch| { - match ch { - 'C' | 'G' => content += 1, - _ => (), - } - } - (content as float) / (len(dna) as float) -} +extern mod rosalind; +use rosalind::dna::gc_content; +use rosalind::fasta::FASTAReader; fn main() { let reader = FASTAReader::new(); @@ -65,6 +12,6 @@ fn main() { max_name = name; } } - println(max_name); - println(fmt!("%.6f", max_gc * 100f)); + io::println(max_name); + io::println(fmt!("%.6f", max_gc * 100f)); } diff --git a/HAMM.rs b/HAMM.rs index 5afb812..a1b2afe 100644 --- a/HAMM.rs +++ b/HAMM.rs @@ -1,16 +1,11 @@ -use io::{stdin,println,ReaderUtil}; +extern mod rosalind; +use rosalind::io::input_line; +use rosalind::str::hamming; fn main() { - let dna1 = stdin().read_line(); - let dna2 = stdin().read_line(); + let dna1 = input_line(); + let dna2 = input_line(); assert str::len(dna1) == str::len(dna2); - let mut hamming = 0; - for str::each_chari(dna1) |i, ch| { - if ch != str::char_at(dna2, i) { - hamming += 1; - } - } - - println(fmt!("%d", hamming)); + io::println(fmt!("%d", hamming(dna1, dna2))); } diff --git a/REVC.rs b/REVC.rs index 3c9e9e7..2503c87 100644 --- a/REVC.rs +++ b/REVC.rs @@ -1,26 +1,9 @@ -use io::{stdin,println,ReaderUtil}; - -/* really feels like there should be a more efficient way to do this */ -fn reverse(s: &str) -> ~str { - let mut r = ~""; - str::reserve(&mut r, str::len(s)); - for str::each_char(s) |ch| { - str::unshift_char(&mut r, ch) - } - r -} - -fn complement(ch: char) -> char { - match ch { - 'A' => 'T', - 'C' => 'G', - 'G' => 'C', - 'T' => 'A', - _ => fail ~"Unknown character found", - } -} +extern mod rosalind; +use rosalind::dna::complement; +use rosalind::io::input_line; +use rosalind::str::reverse; fn main() { - let dna = stdin().read_line(); - println(str::map(reverse(dna), complement)); + let dna = input_line(); + io::println(str::map(reverse(dna), complement)); } diff --git a/RNA.rs b/RNA.rs index 30091fe..9c20887 100644 --- a/RNA.rs +++ b/RNA.rs @@ -1,12 +1,16 @@ use io::{stdin,stdout,ReaderUtil,WriterUtil}; +extern mod rosalind; +use rosalind::dna::transcribe; + fn main() { let stdout = stdout(); for stdin().each_char() |ch| { - match ch { - 'T' => { stdout.write_char('U') } - '\n' => { stdout.write_char(ch); return } - _ => { stdout.write_char(ch) } + // each_char returning -1 here is a bug + if (ch == '\n' || ch == (-1 as char)) { + stdout.write_char('\n'); + return; } + stdout.write_char(transcribe(ch)); } } diff --git a/rosalind/dna.rs b/rosalind/dna.rs new file mode 100644 index 0000000..a2ee73d --- /dev/null +++ b/rosalind/dna.rs @@ -0,0 +1,30 @@ +use str = core::str; + +pure fn gc_content(dna: ~str) -> float { + let mut content = 0; + for str::each_char(dna) |ch| { + match ch { + 'C' | 'G' => content += 1, + _ => (), + } + } + (content as float) / (str::len(dna) as float) +} + +pure fn complement(base: char) -> char { + match base { + 'A' => 'T', + 'C' => 'G', + 'G' => 'C', + 'T' => 'A', + _ => fail ~"Unknown character found", + } +} + +pure fn transcribe(base: char) -> char { + match base { + 'T' => 'U', + 'A' | 'C' | 'G' => base, + _ => fail ~"Unknown character found", + } +} diff --git a/rosalind/fasta.rs b/rosalind/fasta.rs new file mode 100644 index 0000000..9cd80a1 --- /dev/null +++ b/rosalind/fasta.rs @@ -0,0 +1,45 @@ +use core::io::{stdin,Reader,ReaderUtil}; +use str = core::str; + +const EOF: char = -1 as char; + +struct FASTAReader { + in: Reader, + priv mut peeked: char, +} + +impl FASTAReader { + static fn new() -> FASTAReader { + FASTAReader { in: stdin(), peeked: EOF } + } + + priv fn read_line(&self) -> ~str { + let mut line = self.in.read_line(); + if self.peeked != '>' { + str::unshift_char(&mut line, self.peeked); + } + self.peeked = self.in.read_char(); + line + } + + priv fn read_sequence(&self) -> (~str, ~str) { + if self.peeked == EOF { + self.peeked = self.in.read_char(); + } + + let name = self.read_line(); + let mut dna = ~""; + while !self.in.eof() && self.peeked != '>' { + let line = self.read_line(); + str::push_str(&mut dna, line); + } + (name, dna) + } + + fn each_sequence(&self, cb: fn(~str, ~str) -> bool) { + while !self.in.eof() { + let (name, dna) = self.read_sequence(); + cb(name, dna); + } + } +} diff --git a/rosalind/io.rs b/rosalind/io.rs new file mode 100644 index 0000000..86abe3f --- /dev/null +++ b/rosalind/io.rs @@ -0,0 +1,5 @@ +use core::io::{stdin,ReaderUtil}; + +fn input_line() -> ~str { + stdin().read_line() +} diff --git a/rosalind/mod.rc b/rosalind/mod.rc new file mode 100644 index 0000000..3908b83 --- /dev/null +++ b/rosalind/mod.rc @@ -0,0 +1,8 @@ +#[link(name = "rosalind", vers = "0.0.1", author = "doy")]; + +#[crate_type = "lib"]; + +mod dna; +mod fasta; +mod io; +mod str; diff --git a/rosalind/str.rs b/rosalind/str.rs new file mode 100644 index 0000000..0fac1cd --- /dev/null +++ b/rosalind/str.rs @@ -0,0 +1,21 @@ +use str = core::str; + +/* really feels like there should be a more efficient way to do this */ +fn reverse(s: &str) -> ~str { + let mut r = ~""; + str::reserve(&mut r, str::len(s)); + for str::each_char(s) |ch| { + str::unshift_char(&mut r, ch) + } + r +} + +pure fn hamming(string1: ~str, string2: ~str) -> int { + let mut hamming = 0; + for str::each_chari(string1) |i, ch| { + if ch != str::char_at(string2, i) { + hamming += 1; + } + } + hamming +} -- cgit v1.2.3