// (c) 2000 Benjamin Fry, MIT Media Laboratory, fry@media.mit.edu
// Aesthetics + Computation Group, Massachussetts Institute of Technology


import java.io.*;
import java.net.*;


public class MicroArray {
  int gcount; // gene count (# of rows)
  int ecount; // experiment count (# of columns)
  float data[][];

  String idLabel;
  String id[]; // column 1, required (ORF, YORF, etc)
  String label[]; // row 1, experiment/sample labels

  // optional columns
  String uid[]; 
  String name[];
  float gweight[];
  int gorder[];

  // optional rows
  String aid[];
  float eweight[];
  int eorder[];


  // clustered data table .cdt files used by eisen
  public MicroArray(InputStream input) throws IOException {
    InputStreamReader isr = new InputStreamReader(input);
    BufferedReader reader = new BufferedReader(isr);

    int uidColumn = -1;
    int nameColumn = -1;
    int gweightColumn = -1;
    int gorderColumn = -1;

    String line = reader.readLine();
    String pieces[] = Utilities.split(line, '\t');
    idLabel = pieces[0];  // i.e. ORF, YORF, etc
    //System.out.println("id label = " + idLabel);
    boolean stillFindingHeaders;
    int left = 0;
    do {
      left++;
      stillFindingHeaders = false;
      if (pieces[left].equals("UID")) {
	uidColumn = left;
	stillFindingHeaders = true;
      } else if (pieces[left].equals("NAME")) {
	nameColumn = left;
	stillFindingHeaders = true;
      } else if (pieces[left].equals("GWEIGHT")) {
	gweightColumn = left;
	stillFindingHeaders = true;
      } else if (pieces[left].equals("GORDER")) {
	gorderColumn = left;
	stillFindingHeaders = true;
      }
    } while (stillFindingHeaders);

    ecount = pieces.length - left;
    //System.out.println("MicroArray.ecount = " + ecount);
    // will there ever be bogus data at the end? i assume so
    while (pieces[left + ecount-1].trim().equals("")) ecount--;

    // read in the labels for the experiments
    label = new String[ecount];
    for (int i = 0; i < ecount; i++) {
      label[i] = pieces[left + i];
    }

    int top = 1;  // for debugging
    while ((line = reader.readLine()) != null) {
      pieces = Utilities.split(line, '\t');

      if (pieces[0].equals("EWEIGHT")) {
	top++;
	eweight = new float[ecount];
	for (int i = 0; i < ecount; i++) {
	  eweight[i] = new Float(pieces[i+left]).floatValue();
	}
      } else if (pieces[0].equals("AID")) {
	top++;
	aid = new String[ecount];
	for (int i = 0; i < ecount; i++) {
	  aid[i] = pieces[i+left];
	}
      } else if (pieces[0].equals("EORDER")) {
	top++;
	eorder = new int[ecount];
	for (int i = 0; i < ecount; i++) {
	  eorder[i] = Integer.parseInt(pieces[i+left]);
	}
      } else {
	break;
      }
    }

    String lines[];
    lines = new String[1024]; // guestimate for default size
    lines[0] = line; // last line not chewed up by previous loop

    while ((line = reader.readLine()) != null) {
      line = line.trim();  // remove white space
      if (line.length() == 0) continue;

      if (gcount == lines.length) {
	String temp[] = new String[gcount*2];
	System.arraycopy(lines, 0, temp, 0, gcount);
	lines = temp;
      }
      lines[gcount++] = line;
    }

    data = new float[gcount][ecount];
    id = new String[gcount];
    if (uidColumn != -1) uid = new String[gcount];
    if (nameColumn != -1) name = new String[gcount];
    if (gweightColumn != -1) gweight = new float[gcount];
    if (gorderColumn != -1) gorder = new int[gcount];

    for (int j = 0; j < gcount; j++) {
      pieces = Utilities.split(lines[j], '\t');
      id[j] = pieces[0];
      if (uid != null) uid[j] = pieces[uidColumn];
      if (name != null) name[j] = pieces[nameColumn];
      if (gorder != null) gorder[j] = Integer.parseInt(pieces[gorderColumn]);
      if (gweight != null) 
	gweight[j] = new Float(pieces[gweightColumn]).floatValue();
      
      for (int i = 0; i < pieces.length-left; i++) {
	try {
	  data[j][i] = (pieces[left + i].equals("") || 
			pieces[left + i].equals("#VALUE!")) ? 
	    Float.NaN : (new Float(pieces[left + i])).floatValue();

	} catch (NumberFormatException e) {
	  System.out.println("number format problem: " +
			     "'" + pieces[left+i] + "'" + 
			     "   [" + j + "+" + top + 
			     ", " + i + "+" + left + "]");
	  data[j][i] = Float.NaN;
	}
      }
      // fill in the rest of an incomplete row
      for (int i = pieces.length-left; i < ecount; i++) {
	data[j][i] = Float.NaN;
      }
      // sun's jdk 1.1 sucks for some reason, and will just run
      // out of memory, even though there are plenty of things to gc
      // (microsoft's vm doesn't suffer the same asinine behavior)
      if ((j % 1000) == 0) System.gc();
    }
  }


  static public MicroArray fromCdtFile(String filename) throws IOException {
    FileInputStream fis = new FileInputStream(filename);
    return new MicroArray(fis);
  }


  static public MicroArray fromCdtUrl(String url) {
    return null;
  }


  // a little debugging to test how the cdt reader is doing
  static public void main(String args[]) {
    try {
      System.out.println("1");
      MicroArray.fromCdtFile("cdt/yeastdemo.cdt");
      System.out.println("2");
      MicroArray.fromCdtFile("cdt/figure2.cdt");
      System.out.println("3");
      MicroArray.fromCdtFile("cdt/figure3.cdt");

    } catch (Exception e) {
      e.printStackTrace();
    }
  }
}

