2.5.8 #
解答 #
官网实现见:https://algs4.cs.princeton.edu/25applications/Frequency.java.html
用到的数据来自(右键另存为):https://introcs.cs.princeton.edu/java/data/tale.txt
先把所有单词读入,然后排序,一样的单词会被放在一起,
接下来遍历一遍记录每个单词出现的次数。
然后按照频率排序,倒序输出即可。
定义了一个嵌套类 Record
来记录单词及出现次数,实现的比较器按照出现次数排序。
class Record : IComparable<Record>
{
public string Key { get; set; } // 单词
public int Value { get; set; } // 频率
public Record(string key, int value)
{
this.Key = key;
this.Value = value;
}
public int CompareTo(Record other)
{
return this.Value.CompareTo(other.Value);
}
}
测试结果(前 1% 的单词):
代码 #
var filename = "tale.txt";
var sr = new StreamReader(File.OpenRead(filename));
var a = sr.ReadToEnd().Split(new[] { ' ', '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries);
Array.Sort(a);
var records = new Record[a.Length];
var word = a[0];
var freq = 1;
var m = 0;
for (var i = 0; i < a.Length; i++)
{
if (!a[i].Equals(word))
{
records[m++] = new Record(word, freq);
word = a[i];
freq = 0;
}
freq++;
}
records[m++] = new Record(word, freq);
Array.Sort(records, 0, m);
// 只显示频率为前 1% 的单词
for (var i = m - 1; i >= m * 0.99; i--)
{
Console.WriteLine(records[i].Value + " " + records[i].Key);
}
internal class Record : IComparable<Record>
{
public string Key { get; set; } // 单词
public int Value { get; set; } // 频率
public Record(string key, int value)
{
Key = key;
Value = value;
}
public int CompareTo(Record other)
{
return Value.CompareTo(other.Value);
}
}