Regular Expressions in C# – Part 4 – Wild Character

We use the dot character '.' to match any character in a regular expression pattern. It is called a wild character. This includes spaces, but not the newline character. If we want to match only word boundaries we use the /b anchor. If the character between these boundaries must be alpha-numeric (or underscore) [a-zA-z0-9_] we can use the shorthand \w instead of the dot. Here are a few examples.

using System.Text.RegularExpressions;
using Microsoft.VisualStudio.TestTools.UnitTesting;
using RegularExpressions.Tests.Helpers;
 
namespace RegularExpressions.Tests.Part04
{
    [TestClass]
    public class WildCharacters
    {
        [TestMethod]
        public void Each_Character_Produces_Match_Except_NewLine()
        {
            const string pattern = ".";
            const string subject = "boy\ngirl";
            var regEx = new Regex(pattern);
            MatchCollection matches = regEx.Matches(subject);
 
            foreach (Match match in matches)
            {
                DebugWriter.WriteMatch(match, subject);
            }
 
            Assert.AreEqual(7, matches.Count);
 
            // Debug Trace:
            // 0: 1: b
            // 1: 1: o
            // 2: 1: y
            // 4: 1: g
            // 5: 1: i
            // 6: 1: r
            // 7: 1: l
        }
 
        [TestMethod]
        public void Matches_Each_Boundery_Of_Three_Characters()
        {
            const string pattern = @"\b.{3}\b";
            const string subject = "man bear pig xx";
            var regEx = new Regex(pattern);
            MatchCollection matches = regEx.Matches(subject);
 
            foreach (Match match in matches)
            {
                DebugWriter.WriteMatch(match, subject);
            }
 
            Assert.AreEqual(3, matches.Count);
 
            // Debug Trace:
            // 0: 3: man
            // 9: 3: pig
            // 12: 3:  xx <- Note: space-x-x is also a match
        }
 
        [TestMethod]
        public void Matches_Each_Word_Of_Three_Characters()
        {
            const string pattern = @"\b\w{3}\b";
            const string subject = "man bear pig xx";
            var regEx = new Regex(pattern);
            MatchCollection matches = regEx.Matches(subject);
 
            foreach (Match match in matches)
            {
                DebugWriter.WriteMatch(match, subject);
            }
 
            Assert.AreEqual(2, matches.Count);
 
            // Debug Trace:
            // 0: 3: man
            // 9: 3: pig
        }
 
        [TestMethod]
        public void Matches_Each_Word_Of_Any_Length_Of_Characters_Starting_With_P()
        {
            const string pattern = @"p\w+";
            const string subject = "man bear pig\n pothole";
            var regEx = new Regex(pattern);
            MatchCollection matches = regEx.Matches(subject);
 
            foreach (Match match in matches)
            {
                DebugWriter.WriteMatch(match, subject);
            }
 
            Assert.AreEqual(2, matches.Count);
 
            // Debug Trace:
            // 9: 3: pig
            // 14: 7: pothole
        }
    }
}

Leave a Reply

Your email address will not be published. Required fields are marked *

You may use these HTML tags and attributes: <a href="" title=""> <abbr title=""> <acronym title=""> <b> <blockquote cite=""> <cite> <code> <del datetime=""> <em> <i> <q cite=""> <strike> <strong>