package org.biomoby.service.test;

import org.biomoby.shared.MobyDataType;
import org.biomoby.shared.data.*;
import org.biomoby.service.*;

@mobyService(name="ConvertFASTAToDNASequence",
             type="FormatConversion", 
	     provider="moby.ucalgary.ca", 
	     author="gordonp@ucalgary.ca",
	     in={"inseq:FASTA"},
	     out={"outseq:DNASequence"},
	     description={"Converts FastA formatted records into DNASequences, ", 
			  "primarily to increase inter-service compatibility"})

/**
 * Simple service used for testing and as an example for users of how to use exceptions 
 * and warnings in MOBY services.
 */

public class FromFastaToDNASequence extends MobyServlet{

    /**
     * Parses a FastA string into its header component and sequence, constructing
     * a MOBY DNASequence object (or subclass thereof), which can be used in more services.
     * An exception will be thrown if the data doesn't appear to be FastA, or a warning
     * will be added to the results if the sequence doesn't appear to be DNA.  
     */
    public void processRequest(MobyDataJob request, MobyDataJob result) throws Exception{
         // The input parameter for this method is registered as "inseq"
         MobyDataComposite fastaObject = (MobyDataComposite) request.get("inseq");

         // SequenceString is a member of incoming AminoAcidSequence object
         MobyDataString fastaStringObject = (MobyDataString) fastaObject.get("content");  
         String fastaString = fastaStringObject.toString();

         // Do the reformatting
	 int newLineIndex = fastaString.indexOf('\n');
	 if(newLineIndex == 0){//ignore first char if \n
	     newLineIndex = fastaString.indexOf('\n', 1);
	 }
	 // Die if the format is bad
	 if(newLineIndex == -1){
	     throw new MobyServiceException(MobyServiceException.ERROR, 
					    MobyServiceException.INPUTS_INVALID, 
					    request.getID(), 
					    "content", 
					    "The input sequence was not FASTA, " +
					    "it does not even contain two lines");
	 }
	 if(fastaString.indexOf('>') > 1){
	     throw new MobyServiceException(MobyServiceException.ERROR, 
					    MobyServiceException.INPUTS_INVALID, 
					    request.getID(), 
					    "content",
					    "The input sequence was not FASTA, " +
					    "it does not start with a '>' header line");
	 }

         String dnaString = fastaString.substring(newLineIndex+1);
	 dnaString = dnaString.replaceAll("\\s", "");  //strip whitespace
	 String desc = null;
	 String header = fastaString.substring(0, newLineIndex);
	 if(header.indexOf(' ') != -1){  // anything before the space is an id, anything after is a desc
	     desc = header.substring(header.indexOf(' ')+1);
	 }

	 // Check the alphabet too to ensure it's DNA. Warn, but don't die
	 if(!isNucleicAcid(dnaString)){
	     addException(new MobyServiceException(MobyServiceException.WARNING, 
						   MobyServiceException.INPUT_INCORRECT_SIMPLE, 
						   request.getID(), 
						   "content",
						   "The input sequence does not appear to be DNA or RNA, " +
						   "but is being processed anyway."));
	 }
	 if(dnaString.indexOf('U') != -1 || dnaString.indexOf('u') != -1){
	     dnaString = dnaString.replaceAll("U", "T").replaceAll("u", "t");
	     addException(new MobyServiceException(MobyServiceException.INFO, 
						   MobyServiceException.OK, 
						   request.getID(), 
						   "content",
						   "The input sequence appears to be RNA, " +
						   "and was coerced into DNA"));
	 }

         MobyDataType dnaType = null;
	 if(desc != null && desc.length() != 0){
	     dnaType = MobyDataType.getDataType("CommentedDNASequence");
	 }
	 else{
	     dnaType = MobyDataType.getDataType("DNASequence");
	 }
         MobyDataComposite dnaObject = new MobyDataComposite(dnaType, 
							     fastaObject.getPrimaryNamespace(), 
							     fastaObject.getId(),
							     "SequenceString", dnaString,
							     "Length", dnaString.length());
	 if(desc != null && desc.length() != 0){
	     dnaObject.put("Description", new MobyDataString(desc));
	 }

         // Set the result that will be passed back to the client
         result.put("outseq", dnaObject);
    }
 
    public static boolean isNucleicAcid(String sequence){
	String naChars = sequence.replaceAll("[^acgtunxACGTUNX]", "");
	String nonNAChars = sequence.replaceAll("[acgtumrwsykvhdbnxACGTUMRWSYKVHDBNX \\-\t\r\n]", "");
	double seqLen = (double) sequence.length(); // not int, because we don't want integer division
	return naChars.length()/seqLen > 0.66 && nonNAChars.length()/seqLen < 0.05;
    }

}
