当前位置: 首页 > 工具软件 > pangu.js > 使用案例 >

PanGu分词器

沃学
2023-12-01

https://top.chinaz.com/


https://sourceforge.net/projects/ktdictseg/
https://blog.csdn.net/lijun7788/article/details/7719439
http://www.cftea.com/c/2017/06/7991.asp
https://blog.csdn.net/wudiyong22/article/details/48289965
https://github.com/stanzhai/IKAnalyzer.NET
Lucene.net(4.8.0)+PanGu分词器
Install-Package jieba.NET -Version 0.42.2

https://github.com/anderscui/jieba.NET
var segmenter = new JiebaSegmenter();
var segments = segmenter.Cut("我来到北京清华大学", cutAll: true);
Console.WriteLine("【全模式】:{0}", string.Join("/ ", segments));

segments = segmenter.Cut("我来到北京清华大学");  // 默认为精确模式
Console.WriteLine("【精确模式】:{0}", string.Join("/ ", segments));

segments = segmenter.Cut("他来到了网易杭研大厦");  // 默认为精确模式,同时也使用HMM模型
Console.WriteLine("【新词识别】:{0}", string.Join("/ ", segments));

segments = segmenter.CutForSearch("小明硕士毕业于中国科学院计算所,后在日本京都大学深造"); // 搜索引擎模式
Console.WriteLine("【搜索引擎模式】:{0}", string.Join("/ ", segments));

segments = segmenter.Cut("结过婚的和尚未结过婚的");
Console.WriteLine("【歧义消除】:{0}", string.Join("/ ", segments));

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;

using System.IO;
using Lucene.Net.Index;
using Lucene.Net.Search;
using Lucene.Net.Store;
using Lucene.Net.Util;
using Lucene.Net.Documents;
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.Standard;
using JiebaNet.Segmenter;

namespace WindowsFormsApp1
{
    //http://www.zhuzhusoft.com/article.php?id=151
    //Install-Package Lucene.Net -Pre
    //Install-Package Lucene.Net.Analysis.Common -Version 4.8.0-beta00014
    public partial class Form1 : Form
    {
        public Form1()
        {
            InitializeComponent();
        }

        private void button1_Click(object sender, EventArgs e)
        {
            if (this.folderBrowserDialog1.ShowDialog() == DialogResult.OK)
            {
                this.textBox1.Text = this.folderBrowserDialog1.SelectedPath;

                // Ensures index backward compatibility
                const LuceneVersion AppLuceneVersion = LuceneVersion.LUCENE_48;

                // Construct a machine-independent path for the index
                var basePath = Environment.GetFolderPath(
                    Environment.SpecialFolder.CommonApplicationData);
                var indexPath = Path.Combine(basePath, "index");

                var dir = FSDirectory.Open(indexPath);

                // Create an analyzer to process the text
                var analyzer = new StandardAnalyzer(AppLuceneVersion);

                // Create an index writer
                var indexConfig = new IndexWriterConfig(AppLuceneVersion, analyzer);
                var writer = new IndexWriter(dir, indexConfig);


                PanGu.Segment.Init();
                PanGu.Segment segment = new PanGu.Segment();

                string[] files = System.IO.Directory.GetFiles(this.textBox1.Text.Trim(), "*.txt");
                foreach (string file in files)
                {
                    FileInfo f = new FileInfo(file);
                    ICollection words = segment.DoSegment(File.ReadAllText(file));
                    string FavoritePhraseStr = string.Empty;
                    foreach (var word in words)
                    {
                        FavoritePhraseStr += " " + word.Word;
                        Console.WriteLine(word.Word);
                    }

                    var segmenter = new JiebaSegmenter();
                    var segments = segmenter.Cut(File.ReadAllText(file), cutAll: true);
                    Console.WriteLine("【全模式】:{0}", string.Join(" ", segments));

                    var source = new
                    {
                        Name = file,
                        FavoritePhrase = FavoritePhraseStr
                    };
                    var doc = new Document
{
    // StringField indexes but doesn't tokenize
    new StringField("name",
        source.Name,
        Field.Store.YES),
    new TextField("favoritePhrase",
        source.FavoritePhrase,
        Field.Store.YES)
};

                    writer.AddDocument(doc);

                    writer.Flush(triggerMerge: false, applyAllDeletes: false);
                }

                writer.Dispose();
            }
        }


        private void button2_Click(object sender, EventArgs e)
        {


            Ensures index backward compatibility
            //const LuceneVersion AppLuceneVersion = LuceneVersion.LUCENE_48;

            Construct a machine-independent path for the index
            //var basePath = Environment.GetFolderPath(
            //    Environment.SpecialFolder.CommonApplicationData);
            //var indexPath = Path.Combine(basePath, "index");

            //var dir = FSDirectory.Open(indexPath);

            Create an analyzer to process the text
            //var analyzer = new StandardAnalyzer(AppLuceneVersion);

            Create an index writer
            //var indexConfig = new IndexWriterConfig(AppLuceneVersion, analyzer);
            //var writer = new IndexWriter(dir, indexConfig);


            // Search with a phrase
            var phrase = new MultiPhraseQuery
{
    new Term("favoritePhrase", this.textBox2.Text.Trim()),
    //new Term("favoritePhrase", "fox")
};
            Re-use the writer to get real-time updates
            //var reader = writer.GetReader(applyAllDeletes: true);
            //var searcher = new IndexSearcher(reader);


            var basePath = Environment.GetFolderPath(
                Environment.SpecialFolder.CommonApplicationData);
            var indexPath = Path.Combine(basePath, "index");
            var dir = FSDirectory.Open(indexPath);
            var searcher = new IndexSearcher(DirectoryReader.Open(dir));
            var hits = searcher.Search(phrase, 20 /* top 20 */).ScoreDocs;

            DataTable dataTable = new DataTable();
            dataTable.Columns.Add("name");
            dataTable.Columns.Add("favoritePhrase");
            // Display the output in a table
            Console.WriteLine($"{"Score",10}" +
                $" {"Name",-15}" +
                $" {"Favorite Phrase",-40}");
            foreach (var hit in hits)
            {
                var foundDoc = searcher.Doc(hit.Doc);
                DataRow dr = dataTable.NewRow();
                dr["name"] = foundDoc.Get("name");
                dr["favoritePhrase"] = foundDoc.Get("favoritePhrase");
                dataTable.Rows.Add(dr);
                Console.WriteLine($"{hit.Score:f8}" +
                    $" {foundDoc.Get("name"),-15}" +
                    $" {foundDoc.Get("favoritePhrase"),-40}");
            }
            this.dataGridView1.DataSource = dataTable;
        }
    }
}

it自媒体

1、做长远计划 ;2、精准人群定位 ;3、差异化竞争 ;4、有营销点;5、吸粉更容易 ;6、打造知名度 ;7、时间定位运营 ;8、低成本高收益 ;9、增加客户粘性 ;10、内容服务为王。

https://www.svgrepo.com/
https://kalendar.altinselimi.com/
https://www.yuque.com/explore/headlines
Mybatis.net
https://xiaoluoboding.github.io/monthly/2021/2021-03.html#%E5%B7%A5%E5%85%B7
https://xiaoluoboding.github.io/monthly/2019/#%F0%9F%8D%AD-%E8%AE%BE%E8%AE%A1%E5%88%9B%E6%84%8F
http://www.chinavalue.net/Wiki/%E8%87%AA%E5%AA%92%E4%BD%93.aspx
https://www.163.com/dy/article/G3MQE1MQ0511GV8V.html
https://www.infoq.cn/
https://www.infoq.cn/article/W4leI4XZ32eSTqFJ8qPl
https://xiaoluoboding.github.io/monthly/2019/2019-01.html#%E6%95%99%E7%A8%8B
http://yixiaoer.coozf.com/
SpringBoot+SpringMVC+Mybatis+Redis+ELK+Quartz+Websocket+vue.js
https://activity.feishu.cn/


https://www.yuque.com/woniu666/tech_doc/pueka0

网站的排行榜
https://top.chinaz.com/

 类似资料: