问题：

使用回溯拆分字符串

羊舌涵涤

2023-03-14

我试图编写一个代码，将一个无空格的字符串拆分成有意义的单词，但当我给出“arealways”这样的句子时，它返回['a'、'real'、'ways']，我想要的是['are'、'always']，我的字典包含了所有这些单词。我怎样才能编写一个代码，一直回溯到找到最佳匹配？

返回“a”、“real”、“ways”的代码：

splitter.java：

public class splitter {

    HashMap<String, String> map = new HashMap<>();
    Trie dict;

    public splitter(Trie t) {
        dict = t;
    }

    public String split(String test) {
        if (dict.contains(test)) {
            return (test);
        } else if (map.containsKey(test)) {
            return (map.get(test));
        } else {
            for (int i = 0; i < test.length(); i++) {
                String pre = test.substring(0, i);
                if (dict.contains(pre)) {
                    String end = test.substring(i);
                    String fixedEnd = split(end);
                        if(fixedEnd != null){
                            map.put(test, pre + " " + fixedEnd);
                            return pre + " " + fixedEnd;
                        }else {
                        }
                    
                }
            }

        }
        map.put(test,null);
        return null;
    }
}

Trie.java：

public class Trie {
    public static class TrieNode {
        private HashMap<Character, TrieNode> charMap = new HashMap<>();
        public char c;
        public boolean endOWord;
        public void insert(String s){
        }
        public boolean contains(String s){
            return true;
        }
    }
    public TrieNode root;
    
    public Trie() {
        root = new TrieNode();
    }
    
    public void insert(String s){
        TrieNode p = root;
        for(char c : s.toCharArray()) {
            if(! p.charMap.containsKey(c)) {
                TrieNode node = new TrieNode();
                node.c = c;
                p.charMap.put(c, node);
            }
            p = p.charMap.get(c);
        }
        p.endOWord = true;
    }
    public boolean contains(String s){
        TrieNode p = root;
        for(char c : s.toCharArray()) {
            if(!p.charMap.containsKey(c)) {
                return false;
            }
            p = p.charMap.get(c);
        }
        return p.endOWord;
    }
    public void insertDictionary(String filename) throws FileNotFoundException{
        File file = new File(filename);
        Scanner sc = new Scanner(file);
        while(sc.hasNextLine())
            insert(sc.nextLine());
    }
    

    public void insertDictionary(File file) throws FileNotFoundException{
        Scanner sc = new Scanner(file);
        while(sc.hasNextLine())
            insert(sc.nextLine());
    }
}

WordSplitter类：

public class WordSplitter {

    public static void main(String[] args) throws FileNotFoundException {
            
           String test = "arealways";
           String myFile = "/Users/abc/Desktop/dictionary.txt";
           Trie dict = new Trie();
           dict.insertDictionary(myFile);
          splitter sp = new splitter(dict);
          test = sp.split(test);
          
          if(test != null)
          System.out.println(test);
          else
          System.out.println("No Splitting Found.");            
           
    }

        }

端木宏才

2023-03-14

使用OP的split方法和Java Baeldung文章中Trie数据结构中的Trie实现，我能够得到以下结果：

realways=real ways
arealways=a real ways

如果我从单词列表（字典）中删除“real”一词，我会得到以下结果：

realways=null
arealways=are always

下面是我用来获得这些结果的全部代码：

public class Splitter {

    private static Map<String, String> map = new HashMap<>();
    private Trie dict;
    
    public Splitter(Trie t) {
        dict = t;
    }

    /**
     * @param args
     */
    public static void main(String[] args) {
        List<String> words = List.of("a", "always", "are", "area", "real", "ways");
        String test = "arealways";

        Trie t = new Trie();
        for (String word : words) {
            t.insert(word);
        }
        Splitter splitter = new Splitter(t);
        splitter.split(test);
        map.entrySet().forEach(System.out::println);
    }

    public String split(String test) {
        if (dict.find(test)) {
            return (test);
        } else if (map.containsKey(test)) {
            return (map.get(test));
        } else {
            for (int i = 0; i < test.length(); i++) {
                String pre = test.substring(0, i);
                if (dict.find(pre)) {
                    String end = test.substring(i);
                    String fixedEnd = split(end);
                    if (fixedEnd != null) {
                        map.put(test, pre + " " + fixedEnd);
                        return pre + " " + fixedEnd;
                    } else {
                    }

                }
            }

        }
        map.put(test, null);
        return null;
    }

    public static class Trie {
        private TrieNode root = new TrieNode();

        public boolean find(String word) {
            TrieNode current = root;
            for (int i = 0; i < word.length(); i++) {
                char ch = word.charAt(i);
                TrieNode node = current.getChildren().get(ch);
                if (node == null) {
                    return false;
                }
                current = node;
            }
            return current.isEndOfWord();
        }

        public void insert(String word) {
            TrieNode current = root;
            for (char l : word.toCharArray()) {
                current = current.getChildren().computeIfAbsent(l, c -> new TrieNode());
            }
            current.setEndOfWord(true);
        }

        public static class TrieNode {
            private Map<Character, TrieNode> children = new HashMap<>() ;
            private String contents;
            private boolean endOfWord;

            public Map<Character, TrieNode> getChildren() {
                return children;
            }

            public void setEndOfWord(boolean endOfWord) {
                this.endOfWord = endOfWord;
            }

            public boolean isEndOfWord() {
                return endOfWord;
            }
        }

        public void delete(String word) {
            delete(root, word, 0);
        }

        private boolean delete(TrieNode current, String word, int index) {
            if (index == word.length()) {
                if (!current.isEndOfWord()) {
                    return false;
                }
                current.setEndOfWord(false);
                return current.getChildren().isEmpty();
            }
            char ch = word.charAt(index);
            TrieNode node = current.getChildren().get(ch);
            if (node == null) {
                return false;
            }
            
            boolean shouldDeleteCurrentNode = delete(node, word, index + 1) && !node.isEndOfWord();

            if (shouldDeleteCurrentNode) {
                current.getChildren().remove(ch);
                return current.getChildren().isEmpty();
            }
            return false;
        }
    }
}

使用回溯拆分字符串

共有1个答案

相关问答

相关文章

相关阅读

相关工具

相关文档