<noscript id="eassg"><table id="eassg"></table></noscript>
  • <strike id="eassg"><s id="eassg"></s></strike>
  • <ul id="eassg"></ul>
    注冊|登錄

    聯(lián)系電話:024-31891684  13390130939
    沈陽軟件公司--沈陽軟件定制

    沈陽軟件開發(fā)_沈陽軟件公司_沈陽軟件定制/軟件/最新技術(shù)

    Latest technology最新技術(shù)

    辦公OA--正文提取中用到的正則表達(dá)式

    瀏覽量:2589

    CRM定制 辦公OA

    #region 相關(guān)正則表達(dá)式

     
    /// <summary>
    /// 去掉所有html標(biāo)簽
    /// </summary>
    private static readonly Regex FilterAll = new Regex(
    @"(\[([^=]*)(=[^\]]*)?\][\s\S]*?\[/\1\])|(?<lj>(?=[^\u4E00-\u9FA5\uFE30-\uFFA0,."");])<a\s+[^>]*>[^<]{2,}</a>(?=[^\u4E00-\u9FA5\uFE30-\uFFA0,."");]))|(?<Style><style[\s\S]+?/style>)|(?<select><select[\s\S]+?/select>)|(?<Script><script[\s\S]*?/script>)|(?<Explein><\!\-\-[\s\S]*?\-\->)|(?<li><li(\s+[^>]+)?>[\s\S]*?/li>)|(?<Html></?\s*[^> ]+(\s*[^=>]+?=['""]?[^""']+?['""]?)*?[^\[<]*>)|(?<Other>&[a-zA-Z]+;)|(?<Other2>\#[a-z0-9]{6})|(?<Space>\s+)|(\&\#\d+\;)",
    RegexOptions.ExplicitCapture
    | RegexOptions.Multiline
    | RegexOptions.IgnoreCase); //(?<Link><a[\s\S]*?</a>)|
    //(?<Style><style[\s\S]+?/style>)|(?<select><select[\s\S]+?/select>)|(?<Script><script[\s\S]*?/script>)|(?<Explein><\!\-\-[\s\S]*?\-\->)|(?<li><li(\s+[^>]+)?>[\s\S]*?/li>)|(?<Html></?\s*[^> ]+(\s*[^=>]+?=['""]?[^""']+?['""]?)*?[^\[<]*>)|(?<Other>&[a-zA-Z]+;)|(?<Other2>\#[a-z0-9]{6})|(?<Space>\s+)
     
    /// <summary>
    /// 找出title標(biāo)簽
    /// </summary>
    private static readonly Regex FindTitle = new Regex(
    @"<\s*/?title\s*>",
    RegexOptions.ExplicitCapture
    | RegexOptions.Multiline
    | RegexOptions.IgnoreCase);
     
    /// <summary>
    /// 找出title標(biāo)簽內(nèi)容
    /// </summary>
    private static readonly Regex FindTitleContent = new Regex(
    @"<\s*/?title\s*>(?<Content>[\s\S]*?)<\s*/?title\s*>",
    RegexOptions.ExplicitCapture
    | RegexOptions.Multiline
    | RegexOptions.IgnoreCase);
     
    /// <summary>
    /// 找出h 和Strong標(biāo)簽
    /// </summary>
    private static readonly Regex FindHStrong = new Regex(
    @"<\s*/?h\s*>|<\s*/?strong\s*>",
    RegexOptions.ExplicitCapture
    | RegexOptions.Multiline
    | RegexOptions.IgnoreCase);
     
    /// <summary>
    /// 找出p 和br標(biāo)簽
    /// </summary>
    private static readonly Regex FindPB = new Regex(
    @"<\s*/?p\s*>|<\s*br\s*/?>|<\s*/?tr\s*>",
    RegexOptions.ExplicitCapture
    | RegexOptions.Multiline
    | RegexOptions.IgnoreCase);
     
    /// <summary>
    /// 找出nbsp標(biāo)簽
    /// </summary>
    private static readonly Regex FindNbsp = new Regex(
    @"&nbsp",
    RegexOptions.ExplicitCapture
    | RegexOptions.Multiline
    | RegexOptions.IgnoreCase);
     
    /// <summary>
    /// 找出結(jié)尾標(biāo)簽
    /// </summary>
    private static readonly Regex FindS = new Regex(
    @"(?<Content>[\s\S]*?)\$",
    RegexOptions.ExplicitCapture
    | RegexOptions.Multiline
    | RegexOptions.IgnoreCase);
     
    /// <summary>
    /// 找出是否為標(biāo)準(zhǔn)句
    /// </summary>
    private static readonly Regex IsSen = new Regex(
    @"[,.,。!!;;::……??《》“”""]",
    RegexOptions.ExplicitCapture
    | RegexOptions.Multiline
    | RegexOptions.IgnoreCase);
     
    /// <summary>
    /// 找出是否為垃圾句[strong][h]標(biāo)簽過多的
    /// </summary>
    private static readonly Regex IsWs = new Regex(
    @"\[\(h\)\]",
    RegexOptions.ExplicitCapture
    | RegexOptions.Multiline
    | RegexOptions.IgnoreCase);
     
    /// <summary>
    /// 找出是否為垃圾句冒號和·-過多的
    /// </summary>
    private static readonly Regex IsWsM = new Regex(
    @"\[·]|[-]|[::]",
    RegexOptions.ExplicitCapture
    | RegexOptions.Multiline
    | RegexOptions.IgnoreCase);
     
    /// <summary>
    /// 找出是否為BBS特征
    /// </summary>
    private static readonly Regex IsBbsInfo = new Regex(
    @"第[^樓]{1,50}樓|Powered\s*/?by[\s\S]*?Dvbbs|Powered\s*/?by[\s\S]*?Discuz",
    RegexOptions.ExplicitCapture
    | RegexOptions.Multiline
    | RegexOptions.IgnoreCase);
     
    /// <summary>
    /// 取KEYWORD
    /// </summary>
    private static readonly Regex mKeyWord = new Regex(
    @"<meta\s*name\s*=\s*['""]?keywords['""]?\s*content\s*=\s*['""]?(?<KeyWords>[^'"">]*)['""]?[^>]*>|<meta\s*content\s*=\s*['""]?(?<KeyWords>[^'"">]*)['""]?\s*name\s*=\s*['""]?keywords['""]?\s*[^>]*>
    ",RegexOptions.ExplicitCapture| RegexOptions.Multiline| RegexOptions.IgnoreCase);
     
    /// <summary>
    /// 取DESCRIPTION
    /// </summary>
    private static readonly Regex mDescription = new Regex(
    @"<meta\s*name\s*=\s*['""]?description['""]?\s*content\s*=\s*['""]?(?<description>[^'"">]*)['""]?[^>]*>|<meta\s*content\s*=\s*['""]?(?<description>[^'"">]*)['""]?\s*name\s*=\s*['""]?description['""]?\s*[^>]*>
    ",RegexOptions.ExplicitCapture| RegexOptions.Multiline| RegexOptions.IgnoreCase);
     
    /// <summary>
    /// 取Tags
    /// </summary>
    private static readonly Regex mTag = new Regex(
    @"<meta\s*name\s*=\s*['""]?tagwords['""]?\s*content\s*=\s*['""]?(?<tagwords>[^'"">]*)['""]?[^>]*>|<meta\s*content\s*=\s*['""]?(?<tagwords>[^'"">]*)['""]?\s*name\s*=\s*['""]?tagwords['""]?\s*[^>]*>
    ", RegexOptions.ExplicitCapture | RegexOptions.Multiline | RegexOptions.IgnoreCase);
     
    /// <summary>
    /// 找出是否為垃圾句:后字符號過少,:號前無“說”字,:號后無"關(guān)于"
    /// </summary>
    private static readonly Regex IsWsMM = new Regex(
    @"^[^說\s]{0,8}?[::].{0,10}$",
    RegexOptions.ExplicitCapture
    | RegexOptions.Multiline
    | RegexOptions.IgnoreCase);
     
    /// <summary>
    /// 找出spider寫入的url標(biāo)記
    /// </summary>
    private static readonly Regex txtUrl = new Regex(
    @"當(dāng)前URL為:http://(?<URL>.*)",
    RegexOptions.ExplicitCapture
    | RegexOptions.Multiline
    | RegexOptions.IgnoreCase);
     
    /// <summary>
    /// 找出spider寫入的錨點(diǎn)描述標(biāo)記
    /// </summary>
    private static readonly Regex txtDescription = new Regex(
    @"當(dāng)前鏈接描述為:(?<Describe>.*)",
    RegexOptions.ExplicitCapture
    | RegexOptions.Multiline
    | RegexOptions.IgnoreCase);
     
    ///// <summary>
    ///// 取需要a標(biāo)簽
    ///// </summary>
    //private static readonly Regex cleanFirst = new Regex(
    // @"([\u4E00-\u9FA5]|[\uFE30-\uFFA0]|[,."");])(?<Robbish1><a\s+[^>]*>)[^<]{1,6}(?<Robbish2></a>)([\u4E00-\u9FA5]|[\uFE30-\uFFA0]|[,."");])", RegexOptions.ExplicitCapture | RegexOptions.Multiline | RegexOptions.IgnoreCase);
     
    #endregion

    沈陽團(tuán)購網(wǎng)|營口網(wǎng)站制作|沈陽軟件公司|軟件定制|網(wǎng)站建設(shè)|加盟易勢|提交問題

    日韩精品无码熟人妻视频| 在线观看精品视频一区二区三区| 国农村精品国产自线拍| 欧洲精品码一区二区三区免费看| 国产精品久久久久三级| 亚洲A∨精品一区二区三区下载| 99久久免费精品视频| 国产精品拍天天在线| 真实国产乱子伦精品视频| 波霸在线精品视频免费观看| 国产高清在线精品一本大道国产| 日韩美女一级毛片| 日韩av无码中文无码电影| 国产精品久久久久国产精品| 精品国产这么小也不放过| 欧美日韩精品一区二区在线观看| 国产精品高清视亚洲一区二区 | 国产成人精品免费视| 99re6在线精品视频免费播放| 无码日韩人妻精品久久蜜桃| 久久99精品久久久| 亚洲国产精品一区| 日韩精品无码一本二本三本| 无码日韩精品一区二区三区免费| 亚洲国产精品婷婷久久| 99re热这里只有精品18| 久久发布国产伦子伦精品| 久久99精品久久久久久久不卡| 在线电影国产精品| 精品久久无码中文字幕| 97精品一区二区视频在线观看| 国产精品久久久久久福利漫画| 亚洲精品熟女国产| 国产精品视频久久久| 九九精品久久久久久噜噜| 亚洲精品无码久久久久秋霞| 人妻少妇看A偷人无码精品视频 | 在线观看亚洲精品福利片 | 香蕉久久国产精品免| 热99re久久精品2久久久| 国产成人高清精品免费鸭子|