2.5.8

2.5.8 #

解答 #

官网实现见:https://algs4.cs.princeton.edu/25applications/Frequency.java.html

用到的数据来自(右键另存为):https://introcs.cs.princeton.edu/java/data/tale.txt

先把所有单词读入,然后排序,一样的单词会被放在一起,

接下来遍历一遍记录每个单词出现的次数。

然后按照频率排序,倒序输出即可。

定义了一个嵌套类 Record 来记录单词及出现次数,实现的比较器按照出现次数排序。

class Record : IComparable<Record>
{
    public string Key { get; set; }     // 单词
    public int Value { get; set; }      // 频率

    public Record(string key, int value)
    {
        this.Key = key;
        this.Value = value;
    }

    public int CompareTo(Record other)
    {
        return this.Value.CompareTo(other.Value);
    }
}

测试结果(前 1% 的单词):

代码 #

var filename = "tale.txt";
var sr = new StreamReader(File.OpenRead(filename));
var a = sr.ReadToEnd().Split(new[] { ' ', '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries);
Array.Sort(a);

var records = new Record[a.Length];
var word = a[0];
var freq = 1;
var m = 0;
for (var i = 0; i < a.Length; i++)
{
    if (!a[i].Equals(word))
    {
        records[m++] = new Record(word, freq);
        word = a[i];
        freq = 0;
    }

    freq++;
}

records[m++] = new Record(word, freq);

Array.Sort(records, 0, m);
// 只显示频率为前 1% 的单词
for (var i = m - 1; i >= m * 0.99; i--)
{
    Console.WriteLine(records[i].Value + " " + records[i].Key);
}

internal class Record : IComparable<Record>
{
    public string Key { get; set; } // 单词
    public int Value { get; set; } // 频率

    public Record(string key, int value)
    {
        Key = key;
        Value = value;
    }

    public int CompareTo(Record other)
    {
        return Value.CompareTo(other.Value);
    }
}