Examples

String clustering

  %include __EXE_PATH__ "rx_Clustering/Clustering.rax";

  // Create the dataset
  {[#:key, $:value]}: Data := project[.#0, ..] {
    "aap", "ape", "affe", "singe",
    "noot", "nut", "nuss", "noix"
  };
  
  // Create clustering instance with Damerau distance, single linkage and no weights
  \scInstance : I := scConstruct(scNoInstance, Data, scSingle, scDamerau, scNoWeights);
  
  // Create the optimal number of clusters and return cluster labels
  {[#:cluster, #:key]} : optClusters := scGetClusters(I, scBestNClusters);
  `print Data |><| optClusters;
  
  // Create 3 clusters and return cluster labels
  {[#:cluster, #:key]} : clusters := scGetClusters(I, 3);
  `print Data |><| clusters;
  
  // Create 3 clusters and return the medoids
  {[#:cluster, #:key]} : medoids := scGetMedoids(I, 3);
  `print Data |><| medoids;
  
  // Drill in into the first cluster
  I := scDrillDown(I, 1);
  
  // Split this cluster into optimal number of clusters and return cluster
  // labels
  {[#:cluster, #:key]} : subClusters := scGetClusters(I, scBestNClusters);
  `print select [.cluster > 0] Data |><| subClusters;
  
  // Clean up
  I := scDestruct(I);
  
	

Path clustering

  %include __EXE_PATH__ "rx_Clustering/Clustering.rax";
  
  // Create some path data
  {[#:PathID, #:EventType, #:Duration, #:Intensity, @:Timestamp]} : Paths;
  Paths := {
    [1, 1, 1, 1, (@)"2016-07-24T12:01:00"],
    [1, 2, 1, 1, (@)"2016-07-24T12:20:00"],
    [1, 1, 1, 1, (@)"2016-07-24T12:30:00"],
    [2, 1, 1, 1, (@)"2016-07-25T11:01:00"],
    [2, 2, 1, 1, (@)"2016-07-25T11:12:00"],
    [2, 1, 1, 1, (@)"2016-07-25T11:30:00"],
    [3, 1, 1, 1, (@)"2016-07-26T17:05:00"],
    [3, 3, 2, 1, (@)"2016-07-26T17:30:00"],
    [3, 1, 1, 1, (@)"2016-07-26T23:30:00"],
    [4, 1, 1, 1, (@)"2016-07-27T12:01:00"],
    [5, 3, 1, 1, (@)"2016-07-27T19:01:00"]
  };
  
  // Convert them to strings using /cat() aggregate
  // Note that sorting order is important
  Paths := Paths![.PathID, .Timestamp];
  {[#:PathID, $:PathString]} : PathStrings :=
    fold [.PathID, /cat(.EventString)]
    project [.PathID, ($).EventType+($).Duration+($).Intensity:EventString, .Timestamp]
    Paths;
  
  // Create event weights
  {[$:Event, &:Weight]} : EventWeights := {
    ["1",0.1],
    ["2",9999.9],
    ["3",0.1]
  };
  
  // Cluster paths with event-histogram distance and average linkage
  `print "Clustering paths with Event-Histogram distance\n";
  \scInstance : I := scConstruct(scNoInstance, PathStrings, scAverage,
                                 scEventHistogram, EventWeights);
  
  `print "Cluster labels:\n";
  {[#:Cluster, #:PathID]} : Clusters := scGetClusters(I, scBestNClusters);
  `print PathStrings |><| Clusters;
  
  `print "\nCluster medoids:\n";
  {[#:Cluster, #:PathID]} : Medoids := scGetMedoids(I, scBestNClusters);
  `print PathStrings |><| Medoids;
  
  // Clean up
  I := scDestruct(I);
  `print "\n";
  
  // Cluster paths with journey distance and average linkage
  `print "Clustering paths with Journey distance\n";
  I := scConstruct(scNoInstance, PathStrings, scAverage,
                                 scJourney, EventWeights);
  
  `print "Cluster labels:\n";
  Clusters := scGetClusters(I, scBestNClusters);
  `print PathStrings |><| Clusters;
  
  `print "\nCluster medoids:\n";
  Medoids := scGetMedoids(I, scBestNClusters);
  `print PathStrings |><| Medoids;
  
  // Clean up
  I := scDestruct(I);
	  

Note that event types, durations and intensities must be single-digit numbers for the above code to work. In most applications this is good enough. If you're dealing with more than 10 event types, though, you need to translate numeric event types to single-character strings using a translation table rather than a simple cast-to-string operator.