1
1

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

More than 3 years have passed since last update.

C#のSIMDでカスケード積分コムフィルタ

Posted at

前回、C#/SIMDでソフトウェア無線向けのヘテロダイン変換を行ったので、CICフィルタもSIMDで実装してみた。

シンプルforで32million毎秒、SIMDで40million/sec(約25%高速化)、並列SIMDで94.5million毎秒(約3倍高速化)、という結果(前回と同じ環境で、R8, N3)。あまり早くならなかった。

SIMD実装は128bit幅のSIMDを使用し、longで計算している。そのためCICの段数を増やしても飽和する危険性が少ない。
並列SIMD実装は256bit幅のSIMDを使用し、intで計算している。CIC飽和の余裕が少ないが、4chを同時に処理できる。あくまでも4本のデータを同じように処理する程度の並列化なので、メモリアクセスがランダムになる。おそらくここがボトルネックになって遅いんだと思う。シーケンシャルにアクセスできるようにすればある程度早くなりそう。

class CIC_for
{
    private readonly int cicR_;

    public int CicR { get => cicR_; }

    private readonly (int Re, int Im)[] cicBuff1;
    private readonly (int Re, int Im)[] cicBuff2;

    private int cicIndex;

    public CIC_for(int CicR, int CicN)
    {
        cicR_ = CicR;

        cicBuff1 = new (int Re, int Im)[CicN];
        cicBuff2 = new (int Re, int Im)[CicN];

        cicIndex = CicR;
    }

    public void Process((short Re, short Im)[] src, (int Re, int Im)[] dst)
    {
        if (src.Length != dst.Length * cicR_)
        { throw new Exception(); }

        for (int i = 0, j = 0; i < src.Length; i++)
        {
            var cicAcc = (Re: (int)src[i].Re, Im: (int)src[i].Im);

            for (int k = 0; k < cicBuff1.Length; k++)
            {
                cicAcc.Re += cicBuff1[k].Re;
                cicAcc.Im += cicBuff1[k].Im;
                cicBuff1[k] = cicAcc;
            }

            if (--cicIndex == 0)
            {
                cicIndex = cicR_;

                for (int k = 0; k < cicBuff2.Length; k++)
                {
                    var tmp = cicAcc;
                    cicAcc.Re -= cicBuff2[k].Re;
                    cicAcc.Im -= cicBuff2[k].Im;
                    cicBuff2[k] = tmp;
                }

                dst[j++] = cicAcc;
            }
        }
    }
}

class CIC_SIMD
{
    private readonly int cicR_;

    public int CicR { get => cicR_; }

    private readonly Vector128<long>[] cicBuff1;
    private readonly Vector128<long>[] cicBuff2;

    private int cicIndex;

    public CIC_SIMD(int CicR, int CicN)
    {
        cicR_ = CicR;

        cicBuff1 = new Vector128<long>[CicN];
        cicBuff2 = new Vector128<long>[CicN];

        cicIndex = CicR;
    }

    public void Process((short Re, short Im)[] src, (long Re, long Im)[] dst)
    {
        if (src.Length != dst.Length * cicR_)
        { throw new Exception(); }

        unsafe
        {
            fixed ((short, short)* ptr1 = src)
            fixed ((long, long)* ptr2 = dst)
            {
                var p1 = (short*)ptr1;
                var p2 = (long*)ptr2;

                for (int i = 0, j = 0; i < src.Length; i++)
                {
                    var a = Avx2.ConvertToVector128Int64(p1 + i * 2);

                    for (int k = 0; k < cicBuff1.Length; k++)
                    {
                        a = cicBuff1[k] = Avx2.Add(a, cicBuff1[k]);
                    }

                    if (--cicIndex == 0)
                    {
                        cicIndex = cicR_;

                        for (int k = 0; k < cicBuff2.Length; k++)
                        {
                            var b = a;
                            a = Avx2.Subtract(a, cicBuff2[k]);
                            cicBuff2[k] = b;
                        }

                        Avx2.Store(p2 + j++ * 2, a);
                    }
                }
            }
        }
    }
}

class CIC_SIMD_parallel
{
    private readonly int cicR_;

    public int CicR { get => cicR_; }

    private readonly Vector256<int>[] cicBuff1;
    private readonly Vector256<int>[] cicBuff2;

    private int cicIndex;

    public CIC_SIMD_parallel(int CicR, int CicN)
    {
        cicR_ = CicR;

        cicBuff1 = new Vector256<int>[CicN];
        cicBuff2 = new Vector256<int>[CicN];

        cicIndex = CicR;
    }

    public void Process(
        (short Re, short Im)[] src1,
        (short Re, short Im)[] src2,
        (short Re, short Im)[] src3,
        (short Re, short Im)[] src4,
        (int Re, int Im)[] dst1,
        (int Re, int Im)[] dst2,
        (int Re, int Im)[] dst3,
        (int Re, int Im)[] dst4)
    {
        if (src1.Length != src2.Length ||
            src1.Length != src3.Length ||
            src1.Length != src4.Length ||
            src1.Length != dst1.Length * cicR_ ||
            src1.Length != dst2.Length * cicR_ ||
            src1.Length != dst3.Length * cicR_ ||
            src1.Length != dst4.Length * cicR_)
        { throw new Exception(); }

        unsafe
        {
            var foo = new (short, short)[4];
            var bar = new (int, int)[4];

            fixed ((short, short)* p1 = foo)
            fixed ((int, int)* p2 = bar)
            {
                for (int i = 0, j = 0; i < src1.Length; i++)
                {
                    foo[0] = src1[i];
                    foo[1] = src2[i];
                    foo[2] = src3[i];
                    foo[3] = src4[i];

                    var a = Avx2.ConvertToVector256Int32((short*)p1);

                    for (int k = 0; k < cicBuff1.Length; k++)
                    {
                        a = cicBuff1[k] = Avx2.Add(a, cicBuff1[k]);
                    }

                    if (--cicIndex == 0)
                    {
                        cicIndex = cicR_;

                        for (int k = 0; k < cicBuff2.Length; k++)
                        {
                            var b = a;
                            a = Avx2.Subtract(a, cicBuff2[k]);
                            cicBuff2[k] = b;
                        }

                        Avx2.Store((int*)p2, a);

                        dst1[j] = bar[0];
                        dst2[j] = bar[1];
                        dst3[j] = bar[2];
                        dst4[j] = bar[3];
                        j++;
                    }
                }
            }
        }
    }
}
1
1
1

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
1
1

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?