#!/usr/bin/perl # # Name: break_up.pl # Purpose: to break up long genomic sequences into $size chunks each # overlaping by $overlap base pair in order to use Picky for # genome tilling computation. # Usage: ./break_up.pl size overlap genome_input > chopped_output # Created: 6/12/2005 # Copyright: (c) Hui-Hsien Chou # License: See Picky LICENSE.pdf or visit use strict; use warnings; die "Usage: ./break_up.pl size overlap genome_input > chopped_output\n" if @ARGV!=3; my $size = $ARGV[0]; die "Size $size is longer than the maximum length of 16,384 bp\n" if $size>16384; my $overlap = $ARGV[1]; my $batch = $size-$overlap; die "Overlap $overlap is not shorter than the size $size\n" if $batch<=0; my $input = $ARGV[2]; open INPUT, "$input" or die "Can't open input file $input\n"; my $width = 70; my $name; my $seq = ""; while () { chomp; if (/^>/) { output($name, $seq) if length $seq; $seq = ""; $name = $_; } else { $seq .= $_; } } output($name, $seq) if length $seq; close INPUT; sub output { my ($name,$seq) = @_; my @fields = split/\s/, $name; for (my $i=0; $i