#region Copyright (c) 2004, Ryan Whitaker
' Copyright (c) 2004 Ryan Whitaker
' This software is provided 'as-is', without any express or implied warranty. In no
' event will the authors be held liable for any damages arising from the use of this
' software.
' Permission is granted to anyone to use this software for any purpose, including
' commercial applications, and to alter it and redistribute it freely, subject to the
' following restrictions:
' 1. The origin of this software must not be misrepresented; you must not claim that
' you wrote the original software. If you use this software in a product, an
' acknowledgment (see the following) in the product documentation is required.
' This product uses software written by the developers of NClassifier
' (http://nclassifier.sourceforge.net). NClassifier is a .NET port of the Nick
' Lothian's Java text classification engine, Classifier4J
' (http://classifier4j.sourceforge.net).
' 2. Altered source versions must be plainly marked as such, and must not be
' misrepresented as being the original software.
' 3. This notice may not be removed or altered from any source distribution.
using System;
using System.Collections;
using System.Text.RegularExpressions;
namespace NClassifier
public class Utilities
/// Gets an array of sentences.
/// A string that contains sentences.
/// An array of strings, each element containing a sentence.
public static string[] GetSentences(string input)
if (input == null)
return new string[0];
// split on a ".", a "!", a "?" followed by a space or EOL
// the original Java regex was (\.|!|\?)+(\s|\z)
string[] result = Regex.Split(input, @"(?:\.|!|\?)+(?:\s+|\z)");
// hacky... doing this to pass the unit tests
ArrayList list = new ArrayList();
foreach (string s in result)
if (s.Length > 0)
return (string[])list.ToArray(typeof(string));