summaryrefslogtreecommitdiffstats
path: root/LING.pl
diff options
context:
space:
mode:
authorJesse Luehrs <doy@tozt.net>2012-11-18 14:55:15 -0600
committerJesse Luehrs <doy@tozt.net>2012-11-18 14:55:15 -0600
commite2e7f64c2647b1047669311dd509ba8c72d7bed6 (patch)
tree1aa2846764db3cc8c2c5b7237a8483aecdc4c29b /LING.pl
parent349bad0b288ee30dd15170bca19be266b0b1df55 (diff)
downloadrosalind-e2e7f64c2647b1047669311dd509ba8c72d7bed6.tar.gz
rosalind-e2e7f64c2647b1047669311dd509ba8c72d7bed6.zip
another solution
Diffstat (limited to 'LING.pl')
-rw-r--r--LING.pl80
1 files changed, 80 insertions, 0 deletions
diff --git a/LING.pl b/LING.pl
new file mode 100644
index 0000000..1245599
--- /dev/null
+++ b/LING.pl
@@ -0,0 +1,80 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use 5.016;
+
+use List::Util 'min';
+
+chomp(my $str = <>);
+my $max = 0;
+for my $n (1..length($str)) {
+ $max += min(4**$n, length($str) - ($n - 1));
+}
+
+my @nodes = (
+ [''],
+);
+
+for my $suffix (reverse map { substr($str, $_ - 1) } 1..length($str)) {
+ insert($suffix, 0);
+}
+
+printf "%.4f\n", observed() / $max;
+
+sub insert {
+ my ($str, $root) = @_;
+
+ return unless length($str);
+
+ for my $child (@{ $nodes[$root][1] || [] }) {
+ my $child_str = $nodes[$child][0];
+ next unless substr($str, 0, 1) eq substr($child_str, 0, 1);
+
+ my $min = 1;
+ my $max = min(length($child_str), length($str));
+
+ while (1) {
+ my $next = int(($max + $min) / 2);
+ last if $next == $min;
+ if (substr($str, $min, $next - $min) eq substr($child_str, $min, $next - $min)) {
+ $min = $next;
+ }
+ else {
+ $max = $next;
+ }
+ }
+ my $prefix = substr($str, 0, $min);
+ $child_str = substr($child_str, $min);
+ $str = substr($str, $min);
+
+ $nodes[$child][0] = $prefix;
+
+ if ($nodes[$child][1] && @{ $nodes[$child][1] } && length($child_str)) {
+ push @nodes, [$child_str, $nodes[$child][1]];
+ $nodes[$child][1] = [$#nodes];
+ }
+ else {
+ insert($child_str, $child);
+ }
+
+ insert($str, $child);
+
+ return;
+ }
+
+ push @nodes, [$str];
+ push @{ $nodes[$root][1] ||= [] }, $#nodes;
+}
+
+sub observed {
+ sub {
+ my ($node, $cur) = @_;
+
+ my $count = length($nodes[$node][0]);
+ for my $child (@{ $nodes[$node][1] || [] }) {
+ $count += __SUB__->($child, $cur);
+ }
+
+ return $count;
+ }->(0, '');
+}