这次智慧擂台请大家在一个比较庞大的英文文本中找出M个数量最多的短语(由N个单词组成)。统一处理相同的文本文件,该文本只包含英文单词、空格和回行符,比较谁的程序效率最高。
程序输出:高频短语及其数量清单
擂台规则:提交符合以上要求的可执行程序,语言招式不限,点到为止;
我们将在统一的环境下对每位选手的作品进行公平的测试,
比较出综合用时最少的程序。
import java.util.*;
class tt
{
public String phrase;
public int count;
}
public class searchphrase
{
private static LinkedHashMap phrase = new LinkedHashMap();
static tt[] max_phrase;
private static Vector SeparateString(String s)
{
Vector v = new Vector();
String temp = "";
for (int i = 0; i < s.length(); i++)
{
if (s.charAt(i) != ' ')
{
temp += s.charAt(i);
}
else
{
if (temp != "")
v.add(temp);
temp = "";
}
}
if (temp != "")
v.add(temp);
return v;
}
private static void swap(int pos, int count, String phrase)
{
int i;
if (max_phrase[pos - 1].count < count)
{
for (i = pos - 1; i > 0; i--)
{
if (max_phrase[i - 1].count > max_phrase[i].count)
break;
}
max_phrase[pos].count = max_phrase[i].count;
max_phrase[pos].phrase = max_phrase[i].phrase;
max_phrase[i].count = count;
max_phrase[i].phrase = phrase;
}
}
private static void adjust_max(int count, String phrase)
{
int i, j;
if (count <= max_phrase[max_phrase.length - 1].count)
return;
for (i = max_phrase.length - 1; i >= 0; i--)
{
if (max_phrase[i].phrase.equals(phrase))
{
max_phrase[i].count = count;
if (i > 0)
{
swap(i, count, phrase);
}
return;
}
}
max_phrase[max_phrase.length - 1].count = count;
max_phrase[max_phrase.length - 1].phrase = phrase;
if (i > 0)
{
swap(max_phrase.length - 1, count, phrase);
}
}
private static void js(Vector v, int n)
{
String s;
for (int i = 0; i < v.size() - n + 1; i++)
{
s = "";
for (int j = i; j < i + n; j++)
{
s += v.get(j) + " ";
}
int count = 1;
if (phrase.containsKey(s.hashCode()))
{
count = Integer.parseInt(phrase.get(s.hashCode()).toString());
count++;
}
phrase.put(s.hashCode(), count);
adjust_max(count, s);
}
}
public static void main(String[] args)
{
try
{
long t;
int m, n;
String path;
m = Integer.parseInt(args[0]);
n = Integer.parseInt(args[1]);
path = args[2];
max_phrase = new tt[m];
for (int i = 0; i < m; i++)
{
max_phrase[i] = new tt();
max_phrase[i].count = 0;
max_phrase[i].phrase = "";
}
t = (new java.util.Date()).getTime();
java.io.FileReader fr = new java.io.FileReader(path);
java.io.BufferedReader br = new BufferedReader(fr);
String s;
Vector v = null;
while ((s = br.readLine()) != null)
{
v = SeparateString(s);
js(v, n);
}
for (int i = 0; i < m; i++)
{
System.out.println(max_phrase[i].phrase);
System.out.println(max_phrase[i].count);
System.out.println();
}
t = (new java.util.Date()).getTime() - t;
System.out.print(t);
System.out.println(" ms");
}
catch (Exception e)
{
System.out.println(e.getMessage());
}
}
}
71
70
70
69
59
59
48
39
39
39
39
39
39
39
39
39
39
39
39
35
测试结果2 m = 10 n = 5
295
209
183
176
141
122
120
88
88
87
以上源程序是采用的是最简单的方法,谁有更好,效率更高的方法,请跟贴!!