Hatena::Grouptopcoder

naoya_t@topcoder RSSフィード

2008-12-26

split()

| 08:51 | split() - naoya_t@topcoder を含むブックマーク はてなブックマーク - split() - naoya_t@topcoder split() - naoya_t@topcoder のブックマークコメント

私家版split()関数

まずはデリミタがintのもの。

省略時には空白をデリミタとして認識する。

#include <string>
#include <vector>
using namespace std;

vector<string> split(string str, int delim=' ')
{
  vector<string> result;

  const char *s = str.c_str();
  if (delim == ' ') {
    for (const char *p=s; *p; p++) {
      if (*p == delim)
        s++;
      else
        break;
    }
    if (!*s) return result;

    for (const char *p=s; *p; p++) {
      if (*p == delim) {
        if (s < p) {
          string a(s,p-s);
          result.push_back(a);
        }
        s = p + 1;
      }
    }
    if (*s) result.push_back(s);
  } else {
    for (const char *p=s; *p; p++) {
      if (*p == delim) {
        string a(s,p-s);
        result.push_back(a);
        s = p + 1;
        if (*s == '\0') result.push_back("");
      }
    }
    if (*s) result.push_back(s);
  }

  return result;
}

次は文字列(string)をデリミタに取るもの。

#include <string>
#include <vector>
using namespace std;

vector<string> split(string str, string delim)
{
  vector<string> result;

  if (str.length() == 0) return result;

  if (delim.length() == 0) {
    int len = str.length();
    result.resize(len);
    for (int i=0; i<len; i++) result[i] = str.substr(i,1);
    return result;
  }

  int since = 0, at;
  while ((at = str.find(delim, since)) != string::npos) {
    result.push_back(str.substr(since, at-since));
    since = at + delim.length();
  }
  result.push_back(str.substr(since));

  return result;
}

自由に使っていいけど無保証。ご利用は計画的に

おまけ

googletestもつけとくよ。

#include <gtest/gtest.h>

// テストケースを単体の関数として実装する
TEST(SplitTest, Split1)
{
  vector<string> result;

  result = split("", "<>");
  EXPECT_EQ( 1, result.size() );
  
  // "a","<>" => ["a"]
  result = split("a", "<>");
  EXPECT_EQ( 1, result.size() );
  EXPECT_EQ( "a", result[0] );
  
  // "a<>b<>c","<>" => ["a","b","c"]
  result = split("a<>b<>c", "<>");
  EXPECT_EQ( 3, result.size() );
  EXPECT_EQ( "a", result[0] );
  EXPECT_EQ( "b", result[1] );
  EXPECT_EQ( "c", result[2] );
  
  // "a<>b<>c<>","<>" => ["a","b","c",""]
  result = split("a<>b<>c<>", "<>");
  EXPECT_EQ( 4, result.size() );
  EXPECT_EQ( "a", result[0] );
  EXPECT_EQ( "b", result[1] );
  EXPECT_EQ( "c", result[2] );
  EXPECT_EQ( "" , result[3] );
  
  // "<>a<>b<>c","<>" => ["","a","b","c"]
  result = split("<>a<>b<>c", "<>");
  EXPECT_EQ( 4, result.size() );
  EXPECT_EQ( "" , result[0] );
  EXPECT_EQ( "a", result[1] );
  EXPECT_EQ( "b", result[2] );
  EXPECT_EQ( "c", result[3] );
  
  // "<>a<>","<>" => ["","a",""]
  result = split("<>a<>", "<>");
  EXPECT_EQ( 3, result.size() );
  EXPECT_EQ( "" , result[0] );
  EXPECT_EQ( "a", result[1] );
  EXPECT_EQ( "" , result[2] );
  
  // "a<><>b","<>" => ["a","","b"]
  result = split("a<><>b", "<>");
  EXPECT_EQ( 3, result.size() );
  EXPECT_EQ( "a", result[0] );
  EXPECT_EQ( "" , result[1] );
  EXPECT_EQ( "b", result[2] );
  
  // "<>","<>" => ["",""]
  result = split("<>", "<>");
  EXPECT_EQ( 2, result.size() );
  EXPECT_EQ( "", result[0] );
  EXPECT_EQ( "", result[1] );
  //
  result = split("", "");
  EXPECT_EQ( 0, result.size() );
  
  // 特殊用法 "abc".split('')
  result = split("abc", "");
  EXPECT_EQ( 3, result.size() );
  EXPECT_EQ( "a", result[0] );
  EXPECT_EQ( "b", result[1] );
  EXPECT_EQ( "c", result[2] );
  // EXPECT_TRUE_EQUAL( 2, 2 );
}

TEST(SplitTest, Split2)
{
  // cout << "test_split()" << endl;
  // dump_vs(result);
  vector<string> result;

  // "" => []
  result = split("");
  EXPECT_EQ( 0, result.size() );
  // "a" => ["a"]
  result = split("a");
  EXPECT_EQ( 1, result.size() );
  EXPECT_EQ( "a", result[0] );
  // "a b c" => ["a","b","c"]
  result = split("a b c");
  EXPECT_EQ( 3, result.size() );
  EXPECT_EQ( "a", result[0] );
  EXPECT_EQ( "b", result[1] );
  EXPECT_EQ( "c", result[2] );
  // "a " => ["a"]
  result = split("a ");
  EXPECT_EQ( 1, result.size() );
  EXPECT_EQ( "a", result[0] );
  // " a" => ["a"]
  result = split(" a");
  EXPECT_EQ( 1, result.size() );
  EXPECT_EQ( "a", result[0] );
  // " a " => ["a"]
  result = split(" a ");
  EXPECT_EQ( 1, result.size() );
  EXPECT_EQ( "a", result[0] );
  // "a b" => ["a","b"]
  result = split("a b");
  EXPECT_EQ( 2, result.size() );
  EXPECT_EQ( "a", result[0] );
  EXPECT_EQ( "b", result[1] );
  // "a  b" => ["a","b"]
  result = split("a  b");
  EXPECT_EQ( 2, result.size() );
  EXPECT_EQ( "a", result[0] );
  EXPECT_EQ( "b", result[1] );
  
  // "a b c",'b' => ["a "," c"]
  result = split("a b c",'b');
  EXPECT_EQ( 2, result.size() );
  EXPECT_EQ( "a ", result[0] );
  EXPECT_EQ( " c", result[1] );
  
  // "a,b",',' => ["a","b"]
  result = split("a,b", ',');
  EXPECT_EQ( 2, result.size() );
  EXPECT_EQ( "a", result[0] );
  EXPECT_EQ( "b", result[1] );
  // "a,,b",',' => ["a","","b"]
  result = split("a,,b", ',');
  EXPECT_EQ( 3, result.size() );
  EXPECT_EQ( "a", result[0] );
  EXPECT_EQ( "" , result[1] );
  EXPECT_EQ( "b", result[2] );
  // ",a",',' => ["","a"]
  result = split(",a", ',');
  EXPECT_EQ( 2, result.size() );
  EXPECT_EQ( "" , result[0] );
  EXPECT_EQ( "a", result[1] );
  // "a,",',' => ["a",""]
  result = split("a,", ',');
  EXPECT_EQ( 2, result.size() );
  EXPECT_EQ( "a", result[0] );
  EXPECT_EQ( "" , result[1] );
  // ",a,",',' => ["","a",""]
  result = split(",a,", ',');
  EXPECT_EQ( 3, result.size() );
  EXPECT_EQ( "" , result[0] );
  EXPECT_EQ( "a", result[1] );
  EXPECT_EQ( "" , result[2] );
}

int main(int argc, char** argv)
{
  testing::InitGoogleTest(&argc, argv);

  return RUN_ALL_TESTS();
}