【百度面经】提前批Java一面|0721

优质

小牛编辑

75浏览

2024-07-22

import java.io.*;
import java.util.*;
import java.util.Map.Entry;

public class Top10IPs {

    public static void main(String[] args) throws IOException {
        String inputFilePath = "path/to/large/file.txt";
        String tempDirectory = "path/to/temp/directory/";

        // Step 1: Split the large file into smaller chunks
        List<String> chunkFiles = splitFile(inputFilePath, tempDirectory);

        // Step 2: Count IPs in each chunk and save intermediate results
        List<String> resultFiles = new ArrayList<>();
        for (String chunkFile : chunkFiles) {
            String resultFile = countIPsInChunk(chunkFile, tempDirectory);
            resultFiles.add(resultFile);
        }

        // Step 3: Merge intermediate results and find top 10 IPs
        List<Entry<String, Integer>> top10IPs = mergeResults(resultFiles);
        
        // Print the top 10 IPs
        for (Entry<String, Integer> entry : top10IPs) {
            System.out.println(entry.getKey() + ": " + entry.getValue());
        }
    }

    // Method to split the large file into smaller chunks
    public static List<String> splitFile(String inputFilePath, String tempDirectory) throws IOException {
        List<String> chunkFiles = new ArrayList<>();
        int chunkSize = 1000000; // Adjust the chunk size as needed
        int chunkIndex = 0;
        
        BufferedReader reader = new BufferedReader(new FileReader(inputFilePath));
        String line;
        while ((line = reader.readLine()) != null) {
            String chunkFilePath = tempDirectory + "chunk_" + chunkIndex + ".txt";
            PrintWriter writer = new PrintWriter(new FileWriter(chunkFilePath, true));
            int lineCount = 0;
            while (lineCount < chunkSize && line != null) {
                writer.println(line);
                line = reader.readLine();
                lineCount++;
            }
            writer.close();
            chunkFiles.add(chunkFilePath);
            chunkIndex++;
        }
        reader.close();
        return chunkFiles;
    }

    // Method to count IPs in each chunk and save intermediate results
    public static String countIPsInChunk(String chunkFilePath, String tempDirectory) throws IOException {
        Map<String, Integer> ipCountMap = new HashMap<>();
        
        BufferedReader reader = new BufferedReader(new FileReader(chunkFilePath));
        String line;
        while ((line = reader.readLine()) != null) {
            ipCountMap.put(line, ipCountMap.getOrDefault(line, 0) + 1);
        }
        reader.close();

        String resultFilePath = tempDirectory + "result_" + chunkFilePath.substring(chunkFilePath.lastIndexOf('_') + 1);
        PrintWriter writer = new PrintWriter(new FileWriter(resultFilePath));
        for (Entry<String, Integer> entry : ipCountMap.entrySet()) {
            writer.println(entry.getKey() + "," + entry.getValue());
        }
        writer.close();
        return resultFilePath;
    }

    // Method to merge intermediate results and find top 10 IPs
    public static List<Entry<String, Integer>> mergeResults(List<String> resultFiles) throws IOException {
        Map<String, Integer> ipCountMap = new HashMap<>();
        
        for (String resultFile : resultFiles) {
            BufferedReader reader = new BufferedReader(new FileReader(resultFile));
            String line;
            while ((line = reader.readLine()) != null) {
                String[] parts = line.split(",");
                String ip = parts[0];
                int count = Integer.parseInt(parts[1]);
                ipCountMap.put(ip, ipCountMap.getOrDefault(ip, 0) + count);
            }
            reader.close();
        }

        // Find the top 10 IPs
        PriorityQueue<Entry<String, Integer>> minHeap = new PriorityQueue<>(Map.Entry.comparingByValue());
        for (Entry<String, Integer> entry : ipCountMap.entrySet()) {
            minHeap.offer(entry);
            if (minHeap.size() > 10) {
                minHeap.poll();
            }
        }
        
        List<Entry<String, Integer>> top10IPs = new ArrayList<>(minHeap);
        top10IPs.sort((e1, e2) -> Integer.compare(e2.getValue(), e1.getValue()));
        return top10IPs;
    }
}

代码说明

splitFile方法：将大文件分割成多个较小的文件，每个文件包含一定数量的IP地址。
countIPsInChunk方法：统计每个小文件中的IP访问次数，并将结果保存到一个中间结果文件中。
mergeResults方法：合并所有中间结果文件，并找出访问次数排名前十的IP地址。

这段代码假设已经根据具体需求调整了块大小和文件路径。此方法有效地处理了大文件，并找出了访问次数最多的前十个IP地址。

注意事项

内存管理：在处理过程中要密切注意内存的使用情况，以避免内存溢出。
磁盘I/O：优化磁盘I/O操作可以显著提高处理效率。
数据一致性：在处理大文件时，要确保数据的完整性和一致性。
错误处理：添加适当的错误处理逻辑以应对文件读取、写入或排序过程中可能出现的异常情况。

通过上述方法，我们可以在不耗尽机器内存的情况下，有效地处理大文件并找出访问次数排名前十的IP地址。

15. 算法题：在长度为N的有序数组中快速查找所有值为M的元素下标（M可能重复出现）

要在长度为N的有序数组中快速查找所有值为M的元素下标，可以使用二分查找来找到值为M的第一个和最后一个位置，然后再遍历这些位置之间的元素获取所有的下标。这种方法的时间复杂度是O(log N) + O(k)，其中k是值为M的元素的数量。

下面是一个Java实现：

import java.util.ArrayList;
import java.util.List;

public class FindIndices {

    public static void main(String[] args) {
        int[] nums = {1, 2, 2, 2, 3, 4, 5};
        int target = 2;
        List<Integer> indices = findAllIndices(nums, target);
        System.out.println(indices);  // 输出：[1, 2, 3]
    }

    public static List<Integer> findAllIndices(int[] nums, int target) {
        List<Integer> indices = new ArrayList<>();
        
        // 辅助方法，找到target的第一个和最后一个位置
        int firstIndex = findFirst(nums, target);
        int lastIndex = findLast(nums, target);

        // 如果找到的第一个位置是-1，说明数组中没有target
        if (firstIndex == -1) {
            return indices;
        }

        // 遍历从firstIndex到lastIndex的范围，添加所有位置到结果列表中
        for (int i = firstIndex; i <= lastIndex; i++) {
            indices.add(i);
        }

        return indices;
    }

    // 辅助方法，找到target的第一个位置
    private static int findFirst(int[] nums, int target) {
        int left = 0;
        int right = nums.length - 1;
        int result = -1;

        while (left <= right) {
            int mid = left + (right - left) / 2;
            if (nums[mid] == target) {
                result = mid;
                right = mid - 1;  // 继续在左边搜索
            } else if (nums[mid] < target) {
                left = mid + 1;
            } else {
                right = mid - 1;
            }
        }

        return result;
    }

    // 辅助方法，找到target的最后一个位置
    private static int findLast(int[] nums, int target) {
        int left = 0;
        int right = nums.length - 1;
        int result = -1;

        while (left <= right) {
            int mid = left + (right - left) / 2;
            if (nums[mid] == target) {
                result = mid;
                left = mid + 1;  // 继续在右边搜索
            } else if (nums[mid] < target) {
                left = mid + 1;
            } else {
                right = mid - 1;
            }
        }

        return result;
    }
}

代码说明

findAllIndices方法用于找到所有值为target的元素的下标。
findFirst方法用于找到target在数组中的第一个位置。
findLast方法用于找到target在数组中的最后一个位置。
如果找到的第一个位置是-1，说明数组中没有target，直接返回空列表。
否则，遍历从firstIndex到lastIndex的范围，将所有的下标添加到结果列表中。

面经原帖有三毛六站神发布，答案由程序员Hasity整理。

#软件开发笔面经#

【百度面经】提前批Java一面|0721

代码说明

注意事项

15. 算法题：在长度为N的有序数组中快速查找所有值为M的元素下标（M可能重复出现）

代码说明

热门公司

相关阅读

推荐文章

推荐题库

推荐问答