// (c) 2000 Benjamin Fry, MIT Media Laboratory, fry@media.mit.edu
// Aesthetics + Computation Group, Massachussetts Institute of Technology


public class SimilarityMatrix {
  static final boolean LOG_TRANSFORM = true;
  static final boolean MEAN_CENTER = false; //true;
  static final boolean MEDIAN_CENTER = false;
  static final boolean NORMALIZE = true;

  static final int EUCLIDIAN = 0;
  static final int CORRELATION_COEFF = 1;
  int metric;
  
  MicroArray array;
  int gcount, ecount;

  float ndata[][];
  boolean nvalid[][];


  public SimilarityMatrix() { }

  public SimilarityMatrix(MicroArray array, int metric) {
    this.array = array;
    this.metric = metric;

    gcount = array.gcount;
    ecount = array.ecount;

    ndata = new float[gcount][ecount];
    nvalid = new boolean[gcount][ecount];
    for (int j = 0; j < gcount; j++) {
      System.arraycopy(array.data[j], 0, ndata[j], 0, ecount);

      // log transform
      if (LOG_TRANSFORM) {
	for (int i = 0; i < ecount; i++) {
	  if (notNaN(ndata[j][i])) {
	    ndata[j][i] = (ndata[j][i] > 0) ? 
	      (float)Math.log(ndata[j][i]) : Float.NaN;
	  }
	}
      }

      // center around the mean
      if (MEAN_CENTER) {
	float sum = 0;
	int valid = 0;
	for (int i = 0; i < ecount; i++) {
	  if (notNaN(ndata[j][i])) {
	    sum += ndata[j][i];
	    valid++;
	  }
	}
	float mean = sum / (float)valid;
	for (int i = 0; i < ecount; i++) {
	  ndata[j][i] -= mean;
	}

	// center on the median
      } else if (MEDIAN_CENTER) {
      }

      // normalize vectors by their magnitude
      if (NORMALIZE) {
	float mag = 0;
	for (int i = 0; i < ecount; i++) {
	  if (notNaN(ndata[j][i])) {
	    mag += ndata[j][i] * ndata[j][i];
	  }
	}
	mag = (float) Math.sqrt(mag);
	float dist = 0;
	if (mag != 0) {  // if mag is zero, don't div by zero
	  for (int i = 0; i < ecount; i++) {
	    if (notNaN(ndata[j][i])) {
	      ndata[j][i] /= mag;
	    }
	  }
	}
      }
    }
  }


  public float get(int x, int y) {
    if (metric == CORRELATION_COEFF) {
      // implement pearson's correlation coeff on the data
      float xmean = 0; int xcount = 0;
      float ymean = 0; int ycount = 0;
      for (int i = 0; i < ecount; i++) {
	if (notNaN(ndata[x][i])) {
	  xmean += ndata[x][i];
	  xcount++;
	}
	if (notNaN(ndata[y][i])) {
	  ymean += ndata[y][i];
	  ycount++;
	}
      }
      xmean /= (float)xcount;
      ymean /= (float)ycount;

      float sum1 = 0;
      float sum2 = 0;
      for (int i = 0; i < ecount; i++) {
	if (notNaN(ndata[x][i]) && notNaN(ndata[y][i])) {
	  float xx = ndata[x][i] - xmean;
	  float yy = ndata[y][i] - ymean;
	  sum1 += xx * yy;
	  sum2 += xx*xx * yy * yy;
	}
      }
      return (sum2 == 0) ? 0 :
	sum1 / (float) Math.sqrt(sum2);

    } else if (metric == EUCLIDIAN) {
      float dist = 0;
      for (int i = 0; i < ecount; i++) {
	float delta = ndata[x][i] - ndata[y][i];
	dist += delta*delta;
      }
      return (float) Math.sqrt(dist);
    }
    return 0;
  }

  private final boolean notNaN(float f) {
    return (f == f);
  }
}

