Functional Programming w/ C# LINQ

ここ 8 ヵ月ほど C# の LINQ でちょっとばかりデータサイエンティスト的なことをやっている。

大量データ処理は C# LINQ が群を抜いて優秀である。

LINQ はクエリ式より複雑な取り回しをできるメソッド式で記述する方が便利であることが多いが、ここにきてクエリ式の方が有利になるケースを発見した。非決定計算の組合せ総当たりはメソッド式よりクエリ式がかなり効果的だ。

C# ver 8 の LINQ 両方式で、有名な覆面算 "SEND + MORE = MONEY" となる数字 (S, E, N, D, M, O, R, Y) の組合せを総当たりで求めてみる。

#nullable enable

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;

/// 非決定計算
namespace Test {
  public static class TestX {
    public static void Main(string[] argv) {
      var sw = new Stopwatch();

      sw.Start();

      foreach (var item in argv[0] switch { "2" => Money2(), _ => Money1() })
        Console.WriteLine(item);

      sw.Stop();
      Console.WriteLine(string.Format("{0:##,##0.0} ms", sw.ElapsedMilliseconds).PadLeft(8));
    }

    public static readonly IEnumerable<int> Digits = Enumerable.Range(0, 10);

    /// クエリ式方式
    public static IEnumerable<dynamic> Money1() =>
      from s in Digits                                        where s != 0                  // クエリ式は、各 from 行で文字の値候補を生成
      from e in Digits.Except(new [] { s                   })
      from n in Digits.Except(new [] { s, e                })
      from d in Digits.Except(new [] { s, e, n             })
        let send  = new [] { s, e, n, d    }.Aggregate( (x, y) => x * 10 + y )              // 必要候補が確定したら計算を行い、無駄に重複させない

      from m in Digits.Except(new [] { s, e, n, d          }) where m != 0
      from o in Digits.Except(new [] { s, e, n, d, m       })
      from r in Digits.Except(new [] { s, e, n, d, m, o    })
        let more  = new [] { m, o, r, e    }.Aggregate( (x, y) => x * 10 + y ) 

      from y in Digits.Except(new [] { s, e, n, d, m, o, r })
        let money = new [] { m, o, n, e, y }.Aggregate( (x, y) => x * 10 + y ) 

          where send + more == money
            select new { send, more, money };

    /// メソッド式方式
    public static IEnumerable<dynamic> Money2() =>
      Digits
        .SelectMany( s => Digits.Except(new [] { s                      })
        .SelectMany( e => Digits.Except(new [] { s, e                   })
        .SelectMany( n => Digits.Except(new [] { s, e, n                })
        .SelectMany( d => Digits.Except(new [] { s, e, n, d             })
        .SelectMany( m => Digits.Except(new [] { s, e, n, d, m          })
        .SelectMany( o => Digits.Except(new [] { s, e, n, d, m, o       })
        .SelectMany( r => Digits.Except(new [] { s, e, n, d, m, o, r    })
        .Select    ( y =>               new    { s, e, n, d, m, o, r, y }))))))))           // メソッド式は、ここで組合せ生成
          .Where( _ => _.s != 0 && _.m != 0 )                                               // 全候補が確定してから non zero の絞り込み
            .Select( _ => new {                                                             // 各文字の直接参照はできず _ でオブジェクト参照
              send  = new [] { _.s, _.e, _.n, _.d      }.Aggregate( (x, y) => x * 10 + y ), // 全候補が確定してから計算
              more  = new [] { _.m, _.o, _.r, _.e      }.Aggregate( (x, y) => x * 10 + y ),
              money = new [] { _.m, _.o, _.n, _.e, _.y }.Aggregate( (x, y) => x * 10 + y ),
            } )
              .Where( _ => _.send + _.more == _.money );
  }
}

クエリ式は各 from 行で文字の値候補を生成し、必要な個所で条件抽出・計算を逐次行うことが可能なため、重複計算や組合せの広がりを最小限に抑えている。

メソッド式はネストを深く SelectMany() を連ねてから Select() することで一次元シーケンスを作成するしかなく、結果として組合せ生成をしてから条件抽出し、計算せざるを得ない。当然、性能も悪くなる。また、オブジェクト参照の _. がうっとおしい。

クエリ式も場面によっては使える。