import java.io.*;
import java.util.*;
import java.util.Map.Entry;
public class Top10IPs {
public static void main(String[] args) throws IOException {
String inputFilePath = "path/to/large/file.txt";
String tempDirectory = "path/to/temp/directory/";
// Step 1: Split the large file into smaller chunks
List<String> chunkFiles = splitFile(inputFilePath, tempDirectory);
// Step 2: Count IPs in each chunk and save intermediate results
List<String> resultFiles = new ArrayList<>();
for (String chunkFile : chunkFiles) {
String resultFile = countIPsInChunk(chunkFile, tempDirectory);
resultFiles.add(resultFile);
}
// Step 3: Merge intermediate results and find top 10 IPs
List<Entry<String, Integer>> top10IPs = mergeResults(resultFiles);
// Print the top 10 IPs
for (Entry<String, Integer> entry : top10IPs) {
System.out.println(entry.getKey() + ": " + entry.getValue());
}
}
// Method to split the large file into smaller chunks
public static List<String> splitFile(String inputFilePath, String tempDirectory) throws IOException {
List<String> chunkFiles = new ArrayList<>();
int chunkSize = 1000000; // Adjust the chunk size as needed
int chunkIndex = 0;
BufferedReader reader = new BufferedReader(new FileReader(inputFilePath));
String line;
while ((line = reader.readLine()) != null) {
String chunkFilePath = tempDirectory + "chunk_" + chunkIndex + ".txt";
PrintWriter writer = new PrintWriter(new FileWriter(chunkFilePath, true));
int lineCount = 0;
while (lineCount < chunkSize && line != null) {
writer.println(line);
line = reader.readLine();
lineCount++;
}
writer.close();
chunkFiles.add(chunkFilePath);
chunkIndex++;
}
reader.close();
return chunkFiles;
}
// Method to count IPs in each chunk and save intermediate results
public static String countIPsInChunk(String chunkFilePath, String tempDirectory) throws IOException {
Map<String, Integer> ipCountMap = new HashMap<>();
BufferedReader reader = new BufferedReader(new FileReader(chunkFilePath));
String line;
while ((line = reader.readLine()) != null) {
ipCountMap.put(line, ipCountMap.getOrDefault(line, 0) + 1);
}
reader.close();
String resultFilePath = tempDirectory + "result_" + chunkFilePath.substring(chunkFilePath.lastIndexOf('_') + 1);
PrintWriter writer = new PrintWriter(new FileWriter(resultFilePath));
for (Entry<String, Integer> entry : ipCountMap.entrySet()) {
writer.println(entry.getKey() + "," + entry.getValue());
}
writer.close();
return resultFilePath;
}
// Method to merge intermediate results and find top 10 IPs
public static List<Entry<String, Integer>> mergeResults(List<String> resultFiles) throws IOException {
Map<String, Integer> ipCountMap = new HashMap<>();
for (String resultFile : resultFiles) {
BufferedReader reader = new BufferedReader(new FileReader(resultFile));
String line;
while ((line = reader.readLine()) != null) {
String[] parts = line.split(",");
String ip = parts[0];
int count = Integer.parseInt(parts[1]);
ipCountMap.put(ip, ipCountMap.getOrDefault(ip, 0) + count);
}
reader.close();
}
// Find the top 10 IPs
PriorityQueue<Entry<String, Integer>> minHeap = new PriorityQueue<>(Map.Entry.comparingByValue());
for (Entry<String, Integer> entry : ipCountMap.entrySet()) {
minHeap.offer(entry);
if (minHeap.size() > 10) {
minHeap.poll();
}
}
List<Entry<String, Integer>> top10IPs = new ArrayList<>(minHeap);
top10IPs.sort((e1, e2) -> Integer.compare(e2.getValue(), e1.getValue()));
return top10IPs;
}
}
splitFile
方法:将大文件分割成多个较小的文件,每个文件包含一定数量的IP地址。countIPsInChunk
方法:统计每个小文件中的IP访问次数,并将结果保存到一个中间结果文件中。mergeResults
方法:合并所有中间结果文件,并找出访问次数排名前十的IP地址。这段代码假设已经根据具体需求调整了块大小和文件路径。此方法有效地处理了大文件,并找出了访问次数最多的前十个IP地址。
通过上述方法,我们可以在不耗尽机器内存的情况下,有效地处理大文件并找出访问次数排名前十的IP地址。
要在长度为N的有序数组中快速查找所有值为M的元素下标,可以使用二分查找来找到值为M的第一个和最后一个位置,然后再遍历这些位置之间的元素获取所有的下标。这种方法的时间复杂度是O(log N) + O(k),其中k是值为M的元素的数量。
下面是一个Java实现:
import java.util.ArrayList;
import java.util.List;
public class FindIndices {
public static void main(String[] args) {
int[] nums = {1, 2, 2, 2, 3, 4, 5};
int target = 2;
List<Integer> indices = findAllIndices(nums, target);
System.out.println(indices); // 输出:[1, 2, 3]
}
public static List<Integer> findAllIndices(int[] nums, int target) {
List<Integer> indices = new ArrayList<>();
// 辅助方法,找到target的第一个和最后一个位置
int firstIndex = findFirst(nums, target);
int lastIndex = findLast(nums, target);
// 如果找到的第一个位置是-1,说明数组中没有target
if (firstIndex == -1) {
return indices;
}
// 遍历从firstIndex到lastIndex的范围,添加所有位置到结果列表中
for (int i = firstIndex; i <= lastIndex; i++) {
indices.add(i);
}
return indices;
}
// 辅助方法,找到target的第一个位置
private static int findFirst(int[] nums, int target) {
int left = 0;
int right = nums.length - 1;
int result = -1;
while (left <= right) {
int mid = left + (right - left) / 2;
if (nums[mid] == target) {
result = mid;
right = mid - 1; // 继续在左边搜索
} else if (nums[mid] < target) {
left = mid + 1;
} else {
right = mid - 1;
}
}
return result;
}
// 辅助方法,找到target的最后一个位置
private static int findLast(int[] nums, int target) {
int left = 0;
int right = nums.length - 1;
int result = -1;
while (left <= right) {
int mid = left + (right - left) / 2;
if (nums[mid] == target) {
result = mid;
left = mid + 1; // 继续在右边搜索
} else if (nums[mid] < target) {
left = mid + 1;
} else {
right = mid - 1;
}
}
return result;
}
}
findAllIndices
方法用于找到所有值为target
的元素的下标。findFirst
方法用于找到target
在数组中的第一个位置。findLast
方法用于找到target
在数组中的最后一个位置。-1
,说明数组中没有target
,直接返回空列表。firstIndex
到lastIndex
的范围,将所有的下标添加到结果列表中。面经原帖有三毛六站神发布,答案由程序员Hasity整理。
#软件开发笔面经#