隐藏

C#分词SQL算法

发布:2021/11/19 16:08:33作者:管理员 来源:本站 浏览次数:640

1、定义保存分词的list<string>

    public static List<Participle> _participles = new List<Participle>();
    public static List<Participle> _participles_1 = new List<Participle>();
    public static List<string> _sql = new List<string>();

2、调用分词接口拆分查询文本

 /// 利用SCWS进行中文分词
    /// 1370676020@qq.com
    /// </summary>
    /// <param name="str">需要分词的字符串</param>
    /// <returns>用空格分开的分词结果</returns>
    public async Task<string> Segment(string str)
    {
        Task<string> items = Task.Run(()=> {
            System.Text.StringBuilder sb = new System.Text.StringBuilder();
            try
            {
                string s = string.Empty;
                System.Net.CookieContainer cookieContainer = new System.Net.CookieContainer();
                // 将提交的字符串数据转换成字节数组           
                byte[] postData = System.Text.Encoding.ASCII.GetBytes("data=" + System.Web.HttpUtility.UrlEncode(str) + "&respond=json&charset=utf8&ignore=yes&duality=no&traditional=no&multi=0");

                // 设置提交的相关参数
                System.Net.HttpWebRequest request = System.Net.WebRequest.Create("http://www.ftphp.com/scws/api.php") as System.Net.HttpWebRequest;
                request.Method = "POST";
                request.KeepAlive = false;
                request.ContentType = "application/x-www-form-urlencoded";
                request.CookieContainer = cookieContainer;
                request.ContentLength = postData.Length;

                // 提交请求数据
                System.IO.Stream outputStream = request.GetRequestStream();
                outputStream.Write(postData, 0, postData.Length);
                outputStream.Close();

                // 接收返回的页面
                System.Net.HttpWebResponse response = request.GetResponse() as System.Net.HttpWebResponse;
                System.IO.Stream responseStream = response.GetResponseStream();
                System.IO.StreamReader reader = new System.IO.StreamReader(responseStream, System.Text.Encoding.GetEncoding("utf-8"));
                string val = reader.ReadToEnd();

                Newtonsoft.Json.Linq.JObject results = Newtonsoft.Json.Linq.JObject.Parse(val);
                foreach (var item in results["words"].Children())
                {


                    Newtonsoft.Json.Linq.JObject word = Newtonsoft.Json.Linq.JObject.Parse(item.ToString());
                    var _w = word["word"].ToString();
                    if (_w.Length > 1)
                    {
                        _participles.Add(new Participle { Title = _w });
                    }


                    sb.Append("  or title like '%" + word["word"].ToString() + "%'");
                    // sb.Append(word["word"].ToString() + " ");
                    //Response.Write(word["word"].ToString() + "<br/>");
                }
            }
            catch
            {
                return "";
            }

            return sb.ToString();

        });
        return await items;
    }

3、根据分词列表进行SQL语句组装

public async Task<List<string>> GetSearchWordList(int top=5)
    {
        int topcount = _participles.Count;
        if (topcount < 2)
        {
            return null;
        }
        Task<List<string>> items = Task.Run(() =>
        {
            List<string> items1 = new List<string>();

            _sql.Add(string.Format("select top {2} *,{1} as wcount from tbarticle where title like '%{0}%' UNION ALL ", KeyWord,100,top));

            #region 1 职业病,诊断,鉴定,管理办法,中华人民共和国,国家,卫生健康,委员会,
            /*
职业病,诊断,鉴定,管理办法,中华人民共和国,国家,卫生健康,委员会,
职业病,诊断,鉴定,管理办法,中华人民共和国,国家,卫生健康,
职业病,诊断,鉴定,管理办法,中华人民共和国,国家,
职业病,诊断,鉴定,管理办法,中华人民共和国,
职业病,诊断,鉴定,管理办法,
职业病,诊断,鉴定,
职业病,诊断,
             *
             * */
            for (int i = 0; i < _participles.Count - 1; i++)
            {
                var str = string.Empty;
                for (int ii = 0; ii < topcount - i; ii++)
                {
                    str = str + " title like '%" + _participles[ii].Title + "%' and ";
                }
                if (topcount - i == 0) break;
                items1.Add(str);
                if (!string.IsNullOrEmpty(str))
                {
                    str = str.Substring(0, str.Length - 4);
                }

                var sql = string.Format("select top {2} *,{1} as wcount  from tb where {0} UNION ALL ", str, topcount - i,top);
                //Response.Write(str + "<br/>");
                _sql.Add(sql);
            }
            #endregion

            #region 2 诊断,鉴定,管理办法,中华人民共和国,国家,卫生健康,委员会,
            /*
诊断,鉴定,管理办法,中华人民共和国,国家,卫生健康,委员会,
鉴定,管理办法,中华人民共和国,国家,卫生健康,委员会,
管理办法,中华人民共和国,国家,卫生健康,委员会,
中华人民共和国,国家,卫生健康,委员会,
国家,卫生健康,委员会,
卫生健康,委员会,
            *
             *
             * */

            for (int i = 0; i < _participles.Count - 2; i++)
            {
                var str = string.Empty;

                for (int ii = i; ii < topcount - 1; ii++)
                {

                    str = str + " title like '%" + _participles[ii + 1].Title + "%' and ";
                }
                if (topcount - 1 - i == 0) break;
                items1.Add(str);
                if (!string.IsNullOrEmpty(str))
                {
                    str = str.Substring(0, str.Length - 4);
                }
                var sql = string.Format("select top {2} *,{1} as wcount  from tb where {0} UNION ALL ", str, topcount - 1 - i,top);
                //Response.Write(str + "<br/>");
                _sql.Add(sql);
            }
            #endregion

            #region 3
            for (int i = 1; i < _participles.Count - 1; i++)
            {
                var str = string.Empty;
                for (int ii = i; ii < topcount - i; ii++)
                {
                    str = str + " title like '%" + _participles[ii].Title + "%' and ";
                }
                if (topcount - i * 2 == 1 || topcount - i * 2 == 0) break;
                items1.Add(str);
                if (!string.IsNullOrEmpty(str))
                {
                    str = str.Substring(0, str.Length - 4);
                }
                var sql = string.Format("select top {2} *,{1} as wcount  from tb where {0} UNION ALL ", str, topcount - i*2,top);
                //Response.Write(str + "<br/>" );
                _sql.Add(sql);
            }
            #endregion


            return items1;
        });
        return await items;

    }


4、使用

protected void Page_Load(object sender, EventArgs e)
    {
        if (!IsPostBack)
        {
            _participles.Clear();
            _participles_1.Clear();
            _sql.Clear();

             _word= await Segment(KeyWord);
            await GetSearchWordList();

            var str = "";
            var _sql_1 = "";
            foreach (var sql in _sql)
            {
                    str = str + sql;
            }

            str = str.Substring(0, str.Length - 10);
            _sql_1 = "select * ,wcount from tb1 tce right join (select top 100 * from (select id,MAX(wcount) wcount from (" + str + ") a  group by id) b order by wcount desc) s on tce.id=s.id ";

            

        }



    }