算法过程:
public class Vector { public double[] Features { get; set; } public Guid Id { get; private set; } public Guid PreGroupId { get; set; } public Guid CurrentGroupId { get; set; } public Vector(double[] features) { Features = features; PreGroupId = Guid.Empty; CurrentGroupId = Guid.Empty; Id = Guid.NewGuid(); } public void FindMinDistanceGroup(IEnumerable<VectorGroup> groups) { double min = int.MaxValue; VectorGroup minGroup = null; foreach (var vectorGroup in groups) { var distance = DistanceTo(vectorGroup.CenterVector); if (distance < min) { minGroup = vectorGroup; min = distance; } } UpdateGroupId(minGroup.Id); minGroup.AddVector(this); } public bool GroupUpdated() { if (PreGroupId == CurrentGroupId && PreGroupId == Guid.Empty) { return true; } return PreGroupId != CurrentGroupId; } private double DistanceTo(Vector v) { var distance = 0.0; for (int i = 0; i < Features.Length; i++) { distance += Math.Pow(Features[i] - v.Features[i], 2); } return Math.Sqrt(distance); } private void UpdateGroupId(Guid groupId) { PreGroupId = CurrentGroupId; CurrentGroupId = groupId; } public override string ToString() { return "(" + string.Join(",", Features) + ")"; } } public class VectorGroup { public IList<Vector> Vectors { get; set; } public Vector CenterVector { get; set; } public Guid Id { get; private set; } public VectorGroup(Vector initCenter) { CenterVector = initCenter; Id = Guid.NewGuid(); Vectors = new List<Vector>(); } public void AddVector(Vector v) { Vectors.Add(v); } public void Clear() { Vectors.Clear(); } public void SetCenter(Vector center) { CenterVector = center; } public void UpdateCenter() { var vectors = Vectors; if (vectors.Count == 0) { return; } var featureLen = vectors[0].Features.Length; var values = new double[featureLen]; for (int i = 0; i < featureLen; i++) { var sum = 0.0; for (int j = 0; j < vectors.Count; j++) { sum += vectors[j].Features[i]; } values[i] = sum / vectors.Count; } CenterVector = new Vector(values); } public override string ToString() { string s = ""; foreach (var vector in Vectors) { s += vector + " | "; } return string.Format("{0} , {1}", Id, s); } } public static class KMeansDemo { static Vector CenterOne = new Vector(new double[] {10,5}); static Vector CenterTwo = new Vector(new double[] { 20, 1 }); static Vector CenterThree = new Vector(new double[] { 40, 2 }); public static void Execute() { // create testing vectors var vectors = new List<Vector>() { new Vector(new double[] {1,1} ), new Vector(new double[] {20,12} ), new Vector(new double[] {56,67} ), new Vector(new double[] {14,1} ), new Vector(new double[] {20,4} ), new Vector(new double[] {6,8} ), new Vector(new double[] {7,7} ), }; // add init group (and center vector ) var groups = new List<VectorGroup>() { new VectorGroup(CenterOne), new VectorGroup(CenterTwo), new VectorGroup(CenterThree) }; while (true) { // no update, break if (vectors.All(x => !x.GroupUpdated())) { break; } // clear group nodes before dispatching foreach (var vectorGroup in groups) { vectorGroup.Clear(); } // dispatch nodes to min-distance group foreach (var vector in vectors) { vector.FindMinDistanceGroup(groups); } // re-calculate group center point foreach (var vectorGroup in groups) { vectorGroup.UpdateCenter(); } } foreach (var vectorGroup in groups) { Console.WriteLine(vectorGroup.ToString()); } } } class Program { static void Main(string[] args) { KMeansDemo.Execute(); Console.ReadLine(); } }
https://en.wikipedia.org/wiki/K-means_clustering
http://www.saedsayad.com/clustering_kmeans.htm