December 24, 2011

Parsing tweet with Regex in C#

This is a simple snippet which takes raw tweet text and converts it into a formatted tweet. Formatted tweet contains links to user handles, Links and Hashtags

private string ParseTweet(string rawTweet)
{
Regex link = new Regex(@"http(s)?://([\w+?\.\w+])+([a-zA-Z0-9\~\!\@\#\$\%\^\&\*\(\)_\-\=\+\\\/\?\.\:\;\'\,]*)?");
Regex screenName = new Regex(@"@\w+");
Regex hashTag = new Regex(@"#\w+");
string formattedTweet = link.Replace(rawTweet, delegate(Match m)
{
string val = m.Value;
return "<a href='" + val + "'>" + val + "</a>";
});
formattedTweet = screenName.Replace(formattedTweet, delegate(Match m)
{
string val = m.Value.Trim('@');
return string.Format("@<a href='http://twitter.com/{0}'>{1}</a>", val, val);
});
formattedTweet = hashTag.Replace(formattedTweet, delegate(Match m)
{
string val = m.Value;
return string.Format("<a href='http://twitter.com/#search?q=%23{0}'>{1}</a>", val, val);
});
return formattedTweet;
}
view raw FormatTweet.cs hosted with ❤ by GitHub

4 comments:

  1. Any idea what the regex would be to ignore email addresses whilst hyperlinking Twitter usernames?

    ReplyDelete
  2. This comment has been removed by the author.

    ReplyDelete
  3. Perfect! Works like a charm. Thanks!

    ReplyDelete