<noscript id="eassg"><table id="eassg"></table></noscript>
  • <strike id="eassg"><s id="eassg"></s></strike>
  • <ul id="eassg"></ul>
    注冊|登錄

    聯系電話:024-31891684  13390130939
    沈陽軟件公司--沈陽軟件定制

    沈陽軟件開發_沈陽軟件公司_沈陽軟件定制/軟件/最新技術

    Latest technology最新技術

    正文提取中用到的正則表達式

    瀏覽量:2870

    #region 相關正則表達式

     
    /// <summary>
    /// 去掉所有html標簽
    /// </summary>
    private static readonly Regex FilterAll = new Regex(
    @"(\[([^=]*)(=[^\]]*)?\][\s\S]*?\[/\1\])|(?<lj>(?=[^\u4E00-\u9FA5\uFE30-\uFFA0,."");])<a\s+[^>]*>[^<]{2,}</a>(?=[^\u4E00-\u9FA5\uFE30-\uFFA0,."");]))|(?<Style><style[\s\S]+?/style>)|(?<select><select[\s\S]+?/select>)|(?<Script><script[\s\S]*?/script>)|(?<Explein><\!\-\-[\s\S]*?\-\->)|(?<li><li(\s+[^>]+)?>[\s\S]*?/li>)|(?<Html></?\s*[^> ]+(\s*[^=>]+?=['""]?[^""']+?['""]?)*?[^\[<]*>)|(?<Other>&[a-zA-Z]+;)|(?<Other2>\#[a-z0-9]{6})|(?<Space>\s+)|(\&\#\d+\;)",
    RegexOptions.ExplicitCapture
    | RegexOptions.Multiline
    | RegexOptions.IgnoreCase); //(?<Link><a[\s\S]*?</a>)|
    //(?<Style><style[\s\S]+?/style>)|(?<select><select[\s\S]+?/select>)|(?<Script><script[\s\S]*?/script>)|(?<Explein><\!\-\-[\s\S]*?\-\->)|(?<li><li(\s+[^>]+)?>[\s\S]*?/li>)|(?<Html></?\s*[^> ]+(\s*[^=>]+?=['""]?[^""']+?['""]?)*?[^\[<]*>)|(?<Other>&[a-zA-Z]+;)|(?<Other2>\#[a-z0-9]{6})|(?<Space>\s+)
     
    /// <summary>
    /// 找出title標簽
    /// </summary>
    private static readonly Regex FindTitle = new Regex(
    @"<\s*/?title\s*>",
    RegexOptions.ExplicitCapture
    | RegexOptions.Multiline
    | RegexOptions.IgnoreCase);
     
    /// <summary>
    /// 找出title標簽內容
    /// </summary>
    private static readonly Regex FindTitleContent = new Regex(
    @"<\s*/?title\s*>(?<Content>[\s\S]*?)<\s*/?title\s*>",
    RegexOptions.ExplicitCapture
    | RegexOptions.Multiline
    | RegexOptions.IgnoreCase);
     
    /// <summary>
    /// 找出h 和Strong標簽
    /// </summary>
    private static readonly Regex FindHStrong = new Regex(
    @"<\s*/?h\s*>|<\s*/?strong\s*>",
    RegexOptions.ExplicitCapture
    | RegexOptions.Multiline
    | RegexOptions.IgnoreCase);
     
    /// <summary>
    /// 找出p 和br標簽
    /// </summary>
    private static readonly Regex FindPB = new Regex(
    @"<\s*/?p\s*>|<\s*br\s*/?>|<\s*/?tr\s*>",
    RegexOptions.ExplicitCapture
    | RegexOptions.Multiline
    | RegexOptions.IgnoreCase);
     
    /// <summary>
    /// 找出nbsp標簽
    /// </summary>
    private static readonly Regex FindNbsp = new Regex(
    @"&nbsp",
    RegexOptions.ExplicitCapture
    | RegexOptions.Multiline
    | RegexOptions.IgnoreCase);
     
    /// <summary>
    /// 找出結尾標簽
    /// </summary>
    private static readonly Regex FindS = new Regex(
    @"(?<Content>[\s\S]*?)\$",
    RegexOptions.ExplicitCapture
    | RegexOptions.Multiline
    | RegexOptions.IgnoreCase);
     
    /// <summary>
    /// 找出是否為標準句
    /// </summary>
    private static readonly Regex IsSen = new Regex(
    @"[,.,。!!;;::……??《》“”""]",
    RegexOptions.ExplicitCapture
    | RegexOptions.Multiline
    | RegexOptions.IgnoreCase);
     
    /// <summary>
    /// 找出是否為垃圾句[strong][h]標簽過多的
    /// </summary>
    private static readonly Regex IsWs = new Regex(
    @"\[\(h\)\]",
    RegexOptions.ExplicitCapture
    | RegexOptions.Multiline
    | RegexOptions.IgnoreCase);
     
    /// <summary>
    /// 找出是否為垃圾句冒號和·-過多的
    /// </summary>
    private static readonly Regex IsWsM = new Regex(
    @"\[·]|[-]|[::]",
    RegexOptions.ExplicitCapture
    | RegexOptions.Multiline
    | RegexOptions.IgnoreCase);
     
    /// <summary>
    /// 找出是否為BBS特征
    /// </summary>
    private static readonly Regex IsBbsInfo = new Regex(
    @"第[^樓]{1,50}樓|Powered\s*/?by[\s\S]*?Dvbbs|Powered\s*/?by[\s\S]*?Discuz",
    RegexOptions.ExplicitCapture
    | RegexOptions.Multiline
    | RegexOptions.IgnoreCase);
     
    /// <summary>
    /// 取KEYWORD
    /// </summary>
    private static readonly Regex mKeyWord = new Regex(
    @"<meta\s*name\s*=\s*['""]?keywords['""]?\s*content\s*=\s*['""]?(?<KeyWords>[^'"">]*)['""]?[^>]*>|<meta\s*content\s*=\s*['""]?(?<KeyWords>[^'"">]*)['""]?\s*name\s*=\s*['""]?keywords['""]?\s*[^>]*>
    ",RegexOptions.ExplicitCapture| RegexOptions.Multiline| RegexOptions.IgnoreCase);
     
    /// <summary>
    /// 取DESCRIPTION
    /// </summary>
    private static readonly Regex mDescription = new Regex(
    @"<meta\s*name\s*=\s*['""]?description['""]?\s*content\s*=\s*['""]?(?<description>[^'"">]*)['""]?[^>]*>|<meta\s*content\s*=\s*['""]?(?<description>[^'"">]*)['""]?\s*name\s*=\s*['""]?description['""]?\s*[^>]*>
    ",RegexOptions.ExplicitCapture| RegexOptions.Multiline| RegexOptions.IgnoreCase);
     
    /// <summary>
    /// 取Tags
    /// </summary>
    private static readonly Regex mTag = new Regex(
    @"<meta\s*name\s*=\s*['""]?tagwords['""]?\s*content\s*=\s*['""]?(?<tagwords>[^'"">]*)['""]?[^>]*>|<meta\s*content\s*=\s*['""]?(?<tagwords>[^'"">]*)['""]?\s*name\s*=\s*['""]?tagwords['""]?\s*[^>]*>
    ", RegexOptions.ExplicitCapture | RegexOptions.Multiline | RegexOptions.IgnoreCase);
     
    /// <summary>
    /// 找出是否為垃圾句:后字符號過少,:號前無“說”字,:號后無"關于"
    /// </summary>
    private static readonly Regex IsWsMM = new Regex(
    @"^[^說\s]{0,8}?[::].{0,10}$",
    RegexOptions.ExplicitCapture
    | RegexOptions.Multiline
    | RegexOptions.IgnoreCase);
     
    /// <summary>
    /// 找出spider寫入的url標記
    /// </summary>
    private static readonly Regex txtUrl = new Regex(
    @"當前URL為:http://(?<URL>.*)",
    RegexOptions.ExplicitCapture
    | RegexOptions.Multiline
    | RegexOptions.IgnoreCase);
     
    /// <summary>
    /// 找出spider寫入的錨點描述標記
    /// </summary>
    private static readonly Regex txtDescription = new Regex(
    @"當前鏈接描述為:(?<Describe>.*)",
    RegexOptions.ExplicitCapture
    | RegexOptions.Multiline
    | RegexOptions.IgnoreCase);
     
    ///// <summary>
    ///// 取需要a標簽
    ///// </summary>
    //private static readonly Regex cleanFirst = new Regex(
    // @"([\u4E00-\u9FA5]|[\uFE30-\uFFA0]|[,."");])(?<Robbish1><a\s+[^>]*>)[^<]{1,6}(?<Robbish2></a>)([\u4E00-\u9FA5]|[\uFE30-\uFFA0]|[,."");])", RegexOptions.ExplicitCapture | RegexOptions.Multiline | RegexOptions.IgnoreCase);
     
    #endregion
     

    CRM定制 辦公OA找沈陽易勢科技有限公司

    沈陽團購網|營口網站制作|沈陽軟件公司|軟件定制|網站建設|加盟易勢|提交問題

    日韩人妻无码精品久久免费一| 亚洲日韩精品无码专区加勒比 | 国产一区二区三区国产精品| 亚洲日韩国产精品乱-久| 97麻豆精品国产自产在线观看 | 无码国产精品一区二区免费式影视 | 国产A∨国片精品一区二区 | 日本一卡精品视频免费| 国产成人久久精品一区二区三区| 97视频在线观看这里只有精品 | 亚洲国产美女精品久久| 九九精品99久久久香蕉| 亚洲综合国产精品| 久9久9精品免费观看| 久久精品福利视频| 久久久国产精品福利免费 | 国产精品一区不卡| 国产精品自产拍2021在线观看| 无码人妻精品一区二区三区不卡 | 国产午夜亚洲精品国产成人小说| CAOPORM国产精品视频免费| 精品91一区二区三区| 老司机精品福利在线| 日韩中文字幕精品免费一区| 日韩超碰人人爽人人做人人添| 日韩av无码一区二区三区| 日韩美无码五月天| 日韩成人在线视频| 日韩去日本高清在线| 日韩免费福利视频| 久草这里只有精品| 国产69精品久久久久99| 黄床大片免费30分钟国产精品 | 91精品久久久久| 日韩成人av在线| 米奇777四色精品人人爽| 99精品众筹模特自拍视频| 97国产精品视频观看一| 538精品视频在线观看mp4| 91精品国产麻豆国产自产在线| 精品国产一区二区三区四区|