LLMはDoxygenのコメントを生成できるのか

Last updated at 2025-01-31Posted at 2024-03-20

はじめに

DoxygenのコメントをLLMに生成させたく, 実験していました.

プロンプト

Create a doxygen comment for the following C++ Function. doxygen comment only

void* 
partition(
    void* a,
    int (*cmp)(void const*, void const*),
    size_t sz,
    size_t n
) {
    // void* に対して直接ポインタ演算はできないので予め char* へ変換する
    char* const base = a;
    if (n <= 1) return base + sz;
    char* lo = base;
    char* hi = &base[sz * (n - 1)];
    char* m  = lo + sz * ((hi - lo) / sz / 2);
    // m が median-of-3 を指すようソート
    if (cmp(lo, m) > 0) {
        swap(lo, m, sz);
    }
    if (cmp(m, hi) > 0) {
        swap(m, hi, sz);
        if (cmp(lo, m) > 0) {
            swap(lo, m, sz);
        }
    }
    while (1) {
        while (cmp(lo, m) < 0) lo += sz; // ピボット以上の要素を下から探す
        while (cmp(m, hi) < 0) hi -= sz; // ピボット以下の要素を上から探す
        if (lo >= hi) return hi + sz;
        swap(lo, hi, sz);
        // ピボットがスワップされた場合、スワップ先を指すよう m を更新する
        if (lo == m) {
            m = hi;
        } else if (hi == m) {
            m = lo;
        }
        lo += sz;
        hi -= sz;
    }
}

結果

ChatGPT 3.5

回答

/**
 * @brief Partitions an array based on a given comparison function and element size.
 *
 * This function partitions the elements of the array 'a' into two groups based on a pivot element,
 * such that elements less than the pivot come before it and elements greater than the pivot come after it.
 * It uses the median-of-3 technique to select the pivot element for better performance.
 * The comparison function 'cmp' is used to compare elements in the array.
 *
 * @param a Pointer to the array to be partitioned.
 * @param cmp Pointer to the comparison function for comparing elements.
 * @param sz Size of each element in the array.
 * @param n Number of elements in the array.
 * @return Pointer to the partitioned array.
 */

gemma:7b-instruct-q4_K_M

回答

/**
 * Partition function to find the median-of-3 in a sorted array.
 *
 * @param a Pointer to the array.
 * @param cmp Comparison function to compare two elements.
 * @param sz Size of each element in the array.
 * @param n Number of elements in the array.
 *
 * @return Pointer to the median-of-3 element.
 */
void* partition(void* a, int (*cmp)(void const*, void const*), size_t sz, size_t n) {
    // void* に対して直接ポインタ演算はできないので予め char* へ変換する
    char* const base = a;

    // If the number of elements is less than or equal to 1, return the base pointer plus the size of each element.
    if (n <= 1) {
        return base + sz;
    }

    char* lo = base;
    char* hi = &base[sz * (n - 1)];
    char* m  = lo + sz * ((hi - lo) / sz / 2);

    // m が median-of-3 を指すようソート
    if (cmp(lo, m) > 0) {
        swap(lo, m, sz);
    }
    if (cmp(m, hi) > 0) {
        swap(m, hi, sz);
        if (cmp(lo, m) > 0) {
            swap(lo, m, sz);
        }
    }

    while (1) {
        while (cmp(lo, m) < 0) lo += sz; // ピボット以上の要素を下から探す
        while (cmp(m, hi) < 0) hi -= sz; // ピボット以下の要素を上から探す
        if (lo >= hi) return hi + sz;
        swap(lo, hi, sz);

        // ピボットがスワップされた場合、スワップ先を指すよう m を更新する
        if (lo == m) {
            m = hi;
        } else if (hi == m) {
            m = lo;
        }

        lo += sz;
        hi -= sz;
    }
}

codellama:instruct

回答

/**
 * @brief Partitions an array of void* elements using the specified comparison function and size.
 *
 * @param a The base address of the array to be partitioned.
 * @param cmp A pointer to a comparison function that takes two void const* arguments and returns an int.
 * @param sz The size of each element in the array.
 * @param n The number of elements in the array.
 *
 * @return The index of the pivot element in the array.
 */
void* 
partition(
    void* a,
    int (*cmp)(void const*, void const*),
    size_t sz,
    size_t n
) {
    // void* に対して直接ポインタ演算はできないので予め char* へ変換する
    char* const base = a;
    if (n <= 1) return base + sz;
    char* lo = base;
    char* hi = &base[sz * (n - 1)];
    char* m  = lo + sz * ((hi - lo) / sz / 2);
    // m が median-of-3 を指すようソート
    if (cmp(lo, m) > 0) {
        swap(lo, m, sz);
    }
    if (cmp(m, hi) > 0) {
        swap(m, hi, sz);
        if (cmp(lo, m) > 0) {
            swap(lo, m, sz);
        }
    }
    while (1) {
        while (cmp(lo, m) < 0) lo += sz; // ピボット以上の要素を下から探す
        while (cmp(m, hi) < 0) hi -= sz; // ピボット以下の要素を上から探す
        if (lo >= hi) return hi + sz;
        swap(lo, hi, sz);
        // ピボットがスワップされた場合、スワップ先を指すよう m を更新する
        if (lo == m) {
            m = hi;
        } else if (hi == m) {
            m = lo;
        }
        lo += sz;
        hi -= sz;
    }
}

tinyllama:1.1b-chat-v1-q5_K_M

回答

/**
 * @brief The partition() function partitions the given array into two halves using a comparison function and returns a pointer to the first element of each half.
 * @param a Pointer to an array to be partitioned.
 * @param cmpl Function to compare two elements in the array.
 * @param sz Size of the array.
 * @param n Number of elements in the array.
 * @return Pointer to first element of second half (if exists).
 */
char* partition(const char* a, void (*cmp)(void const*, void const*) , size_t sz, size_t n) {
     // Check for overflow if input is too large for array
     if (sz <= 1 || n > sz / 2) return nullptr;

     // Allocaate temporary pointers to halves of array
     char* lo = a + n * (n - 1);
     char* hi = &a[sz * (n - 1)];

     // Sort first half into sorted_lo if necessary
     while (cmp(lo, m) < 0 && i + sz <= hi && cmpl(m, hi)) {
         swap(lo, m, sz);
         lo += sz;
         i += sz;
     }

     // Sort second half into sorted_hi if necessary
     while (i < hi && cmpl(m, hi) > 0) {
         swap(lo, m, sz);
         lo += sz;
         hi -= sz;
     }

     return hi + sz;
}

deepseek-coder:1.3b-instruct-q5_K_M

回答

/** 
 * @brief The partition() function partitions the given array into two halves. This is an implementation of the quicksort algorithm with a median-of-three pivot selection strategy for efficient sorting and division in half operations on sorted arrays or lists, which are common cases when using this as part of larger sorts like merge/heapsort (using QuickSort).
 * @param[in] start Pointer to the first element. 
 * @pre Array size must be greater than one for a partition operation and number elements in array should not exceed half its total length, otherwise it will result into an overflow error or undefined behavior. The comparison function 'cmp' is used during sorting operations that may require comparisons between different arrays/elements of the input pointer data type if such are considered as inputs to this algorithm apart from when partition operation takes place which might involve swapping and hence could lead out-of-bounds errors in case array size exceeds maximum allowed value.
 * @post The function returns a void* pointing towards first element after sorting, but it is important that the user knows exactly what kind of data types are involved (void*) as this may cause issues when dealing with pointers to other non managed objects or structures and would require additional error checks for proper memory management.
 * @complexity O(n) in worst case scenario which can be very rare if partition operation is done well, the time complexity of quicksort algorithm becomes linear (O(N^2)). However it's usually faster than other sorting algorithms especially on small arrays or nearly sorted data where some overhead comes from less comparisons and swaps.
 */ 
void*   // Function return type void as pointer to an element is not typically used in C/C++ programming, this function returns a "pointer" towards the first partitioned elements of array which could be manipulated later on based upon requirements if required such operations can lead out-of bounds error and undefined behaviour.
partition(void* start , int (*cmp)(const void*, const void*)  // The comparison functions for sorting operation is passed as input with c_array's data type to ensure correctness of function usage, which also has a complexity O(n) in worst case scenario where n could be number elements.
      size_t sz ,    int num ){  
       if (num <=1 ||cmp==NULL){ // If the array is empty or contains one element then return pointer to that single position  of input data without any further operations on it as per function requirement . Complexity O(n) in worst case. But generally, this can be avoided by adding checks at beginning like if (start == NULL ) {return start;} This avoids the needless computation and error handling overhead for cases when array is empty or contains one element which might not occur frequently due to common programming practices/rules of thumb - such as avoiding returning void pointers.
        char* base = static_cast<char*>(start);  // Conversion from input pointer (void*) type to a regular character-pointer for better memory management and accessibility in array operations, complexity O(1). It's also important that the user knows exactly what kind of data types are involved here as this might cause issues when dealing with pointers/structures if non managed objects.
        char* lo = base;     // Low-level pointer for iteration through Array  (char*) type, complexity O(1). It's also important that the user knows exactly what kind of data types are involved here as this might cause issues when dealing with pointers/structures if non managed objects.
        char* hi = &base[num * sz -1 ];   // High-level pointer for iteration through Array  (char*) type, complexity O(1). It's also important that the user knows exactly what kind of data types are involved here as this might cause issues when dealing with pointers/structures if non managed objects.
        char* m = lo + sz * ((hi - lo) / 2 / sz);   // Median-of three pivot selection strategy (char*) type, complexity O(1). It's also important that the user knows exactly what kind of data types are involved here as this might cause issues when dealing with pointers/structures if non managed objects.
         /* The code below performs a median-based partition operation and swaps elements to ensure sort order is maintained */  // This block does not affect function return type, complexity O(n). It's also important that the user knows exactly what kind of data types are involved here as this might cause issues when dealing with pointers/structures if non managed objects.
         /* Swap operation has a time complexity equal to number n ie., worst case scenario is linear which can be very rare in practical cases but not impossible */  // This block does not affect function return type, complexity O(n). It's also important that the user knows exactly what kind of data types are involved here as this might cause issues when dealing with pointers/structures if non managed objects.
        /* If necessary a more optimized implementation can be added such algorithms like in-place version quicksort or hybrid sorting algorithm which have lower worst case time complexity than standard quickSort but it also requires additional overheads compared to this one */  // This block does not affect function return type, complexity O(n). It's also important that the user knows exactly what kind of data types are involved here as this might cause issues when dealing with pointers/structures if non managed objects.
}    /* Ending comment for 'partition()'. */  // This block does not affect function return type, complexity O(n). It's also important that the user knows exactly what kind of data types are involved here as this might cause issues when dealing with pointers/structures if non managed objects.
```   /* Ending comment for 'partition()'. */  // This block does not affect function return type, complexity O(n). It's also important that the user knows exactly what kind of data types are involved here as this might cause issues when dealing with pointers/structures if non managed objects.
```   /* Ending comment for 'partition()'. */  // This block does not affect function return type, complexity O(n). It's also important that the user knows exactly what kind of data types are involved here as this might cause issues when dealing with pointers/structures if non managed objects.
```   /* Ending comment for 'partition()'. */  // This block does not affect function return type, complexity O(n). It's also important that the user knows exactly what kind of data types are involved here as this might cause issues when dealing with pointers/structures if non managed objects.
```   /* Ending comment for 'partition()'. */  // This block does not affect function return type, complexity O(n). It's also important that the user knows exactly what kind of data types are involved here as this might cause issues when dealing with pointers/structures if non managed objects.
```   /* Ending comment for 'partition()'. */  // This block does not affect function return type, complexity O(n). It's also important that the user knows exactly what kind of data types are involved here as this might cause issues when dealing with pointers/structures if non managed objects.
```   /* Ending comment for 'partition()'. */  // This block does not affect function return type, complexity O(n). It's also important that the user knows exactly what kind of data types are involved here as this might cause issues when dealing with pointers/structures if non managed objects.
```   /* Ending comment for 'partition()'. */  // This block does not affect function return type, complexity O(n). It's also important that the user knows exactly what kind of data types are involved here as this might cause issues when dealing with pointers/structures if non managed objects.
```   /* Ending comment for 'partition()'. */  // This block does not affect function return type, complexity O(n). It's also important that the user knows exactly what kind of data types are involved here as this might cause issues when dealing with pointers/structures if non managed objects.
```   /* Ending comment for 'partition()'. */  // This block does not affect function return type, complexity O(n). It's also important that the user knows exactly what kind of data types are involved here as this might cause issues when dealing with pointers/structures if non managed objects.
```   /* Ending comment for 'partition()'. */  // This block does not affect function return type, complexity O(n). It's also important that the user knows exactly what kind of data types are involved here as this might cause issues when dealing with pointers/structures if non managed objects.
```   /* Ending comment for 'partition()'. */  // This block does not affect function return type, complexity O(n). It's also important that the user knows exactly what kind of data types are involved here as this might cause issues when dealing with pointers/structures if non managed objects.
```   /* Ending comment for 'partition()'. */  // This block does not affect function return type, complexity O(n). It's also important that the user knows exactly what kind of data types are involved here as this might cause issues when dealing with pointers/structures if non managed objects.
```   /* Ending comment for 'partition()'. */  // This block does not affect function return type, complexity O(n). It's also important that the user knows exactly what kind of data types are involved here as this might cause issues when dealing with pointers/structures if non managed objects.
```   /* Ending comment for 'partition()'. */  // This block does not affect function return type, complexity O(n). It's also important that the user knows exactly what kind of data types are involved here as this might cause issues when dealing with pointers/structures if non managed objects.
```   /* Ending comment for 'partition()'. */  // This block does not affect function return type, complexity O(n). It's also important that the user knows exactly what kind of data types are involved here as this might cause issues when dealing with pointers/structures if non managed objects.
```   /* Ending comment for 'partition()'. */  // This block does not affect function return type, complexity O(n). It's also important that the user knows exactly what kind of data types are involved here as this might cause issues when dealing with pointers/structures if non managed objects.
```   /* Ending comment for 'partition()'. */  // This block does not affect function return type, complexity O(n). It's also important that the user knows exactly what kind of data types are involved here as this might cause issues when dealing with pointers/structures if non managed objects.
```   /* Ending comment for 'partition()'. */  // This block does not affect function return type, complexity O(n). It's also important that the user knows exactly what kind of data types are involved here as this might cause issues when dealing with pointers/structures if non managed objects.
```   /* Ending comment for 'partition()'. */  // This block does not affect function return type, complexity O(n). It's also important that the user knows exactly what kind of data types are involved here as this might cause issues when dealing with pointers/structures if non managed objects.
```   /* Ending comment for 'partition()'. */  // This block does not affect function return type, complexity O(n). It's also important that the user knows exactly what kind of data types are involved here as this might cause issues when dealing with pointers/structures if non managed objects.
```   /* Ending comment for 'partition()'. */  // This block does not affect function return type, complexity O(n). It's also important that the user knows exactly what kind of data types are involved here as this might cause issues when dealing with pointers/structures if non managed objects.
```   /* Ending comment for 'partition()'. */  // This block does not affect function return type, complexity O(n). It's also important that the user knows exactly what kind of data types are involved here as this might cause issues when dealing with pointers/structures if non managed objects.
```   /* Ending comment for 'partition()'. */  // This block does not affect function return type, complexity O(n). It's also important that the user knows exactly what kind of data types are involved here as this might cause issues when dealing with pointers/structures if non managed objects.
```   /* Ending comment for 'partition()'. */  // This block does not affect function return type, complexity O(n). It's also important that the user knows exactly what kind of data types are involved here as this might cause issues when dealing with pointers/structures if non managed objects.
```   /* Ending comment for 'partition()'. */  // This block does not affect function return type, complexity O(n). It's also important that the user knows exactly what kind of data types are involved here as this might cause issues when dealing with pointers/structures if non managed objects.
```   /* Ending comment for 'partition()'. */  // This block does not affect function return type, complexity O(n). It's also important that the user knows exactly what kind of data types are involved here as this might cause issues when dealing with pointers/structures if non managed objects.

deepseek-coder:6.7b-instruct-q4_K_M

回答

/**
 * \brief Partition an array using a pivot element.
 * 
 * This function partitions the array `a` into two parts so that all elements in the first part are less than or equal to the pivot, and all elements in the second part are greater than the pivot. The size of each element is given by `sz`.
 * 
 * \param a Pointer to the array to be partitioned.
 * \param cmp Comparison function for comparing two elements. This should return a positive number if the first argument is greater than the second, zero if they are equal, and a negative number otherwise.
 * \param sz Size of each element in bytes.
 * \param n Number of elements in the array.
 * 
 * \return Pointer to the pivot after partitioning.
 */

llama3:8b-instruct-q4_K_M

回答

/**
 * \brief Partitions the array \a a into two parts using the given comparison function.
 *
 * This function rearranges the elements in the array to have all elements less than or equal to
 * the pivot element in the first part, and all elements greater than the pivot element in the second part.
 *
 * \param a Pointer to the array to be partitioned.
 * \param cmp Comparison function used to determine the pivot element. Returns a positive value if the first argument is greater,
 * zero if they are equal, and a negative value otherwise.
 * \param sz Size of each element in bytes.
 * \param n Number of elements in the array.
 *
 * \return Pointer to the pivot element after partitioning.
 *

phi3:3.8b-mini-instruct-4k-q4_K_M

回答

/**
 * \brief Partitions a given array into two parts using the 'median-of-three' partitioning method.
 * 
 * This function performs an in-place partition of an array `a` based on a comparison provided by a user-defined comparator function `cmp`. The array is first converted to a char pointer, as void pointers cannot be directly added with size_t values representing bytes. If the array has fewer than two elements, it is considered sorted and can return early.
 * 
 * \param a Pointer to the beginning of the input data array.
 * \param cmp Comparator function that takes two const void* pointers as arguments and returns an int indicating their order (negative if 'lo' should come first).
 * \param sz Size in bytes representing the size of each element in the array.
 * \param n The number of elements to partition.
 * 
 * \returns Pointer to the start of the higher partition after rearranging the array.
 */
void* partition(
    void* a,
    int (*cmp)(const void*, const void*),
    size_t sz,
    size_t n
) {
    // ... [rest of the code remains unchanged]
}

gemma-2-2b-it.Q4_K_M.gguf-GGUF

回答

/**
 * Partitions a portion of an array based on a custom comparison function.
 *
 * \param a  The beginning address of the array to partition. 
 * \param cmp The comparison function used for partitioning. It should compare two void pointers
 *          and return a positive value if the first pointer is smaller than the second. 
 * \param sz   The size of the portion of the array to partition.  
 * \param n    The number of elements in the portion of the array. 
 *
 * This function partitions an array by finding a pivot element and dividing it into two subarrays based on the comparison result of the provided `cmp` function. The algorithm iterates through the array until a point where the partition is complete or there are no more elements to compare.
 *
 * \return A pointer to the end of the partitioned array. 
 */
void* partition(
    void* a,
    int (*cmp)(void const*, void const*),
    size_t sz,
    size_t n
)

試しにプロンプトの冒頭を, Create a doxygen comment for the following C++ Function in Japanese. doxygen comment onlyとしてみました. すごい, これはいけるのではないでしょうか.

回答


/**
 * @brief 
 *   配列の要素をpartitionする。
 * 
 *   @param a 
 *     配列のポインタ
 *   @param cmp 
 *     比較関数。void* に対して直接ポインタ演算はできないので、char* へ変換する。
 *   @param sz 
 *     配列のサイズ
 *   @param n 
 *     配列の要素数
 * 
 * @return 
 *   配列の要素をpartitionした後のポインタ
 */
void* partition(
    void* a,
    int (*cmp)(void const*, void const*),
    size_t sz,
    size_t n
) {
    // void* に対して直接ポインタ演算はできないので予め char* へ変換する
    char* const base = (char*)a;
    if (n <= 1) return base + sz;
    char* lo = base;
    char* hi = &base[sz * (n - 1)];
    char* m  = lo + sz * ((hi - lo) / sz / 2);
    // m が median-of-3 を指すようソート
    if (cmp(lo, m) > 0) {
        swap(lo, m, sz);
    }
    if (cmp(m, hi) > 0) {
        swap(m, hi, sz);
        if (cmp(lo, m) > 0) {
            swap(lo, m, sz);
        }
    }
    while (1) {
        while (cmp(lo, m) < 0) lo += sz; // ピボット以上の要素を下から探す
        while (cmp(m, hi) < 0) hi -= sz; // ピボット以下の要素を上から探す
        if (lo >= hi) return hi + sz;
        swap(lo, hi, sz);
        // ピボットがスワップされた場合、スワップ先を指すよう m を更新する
        if (lo == m) {
            m = hi;
        } else if (hi == m) {
            m = lo;
        }
        lo += sz;
        hi -= sz;
    }
}


**Explanation of the comment:**

* **@brief:** Provides a brief, concise description of the function's purpose.
* **@param a ... @param n:** Lists the parameters of the function and their descriptions (in Japanese).
* **@return:** Describes the return value of the function and its type. 


Let me know if you have any further questions or specific instructions for formatting your Doxygen comments!

gemma-2-2b-jpn-it-gguf-japanese-imatrix

回答

/**
 * @brief Partitions a block of memory into three parts based on the comparison of values. 
 * This function uses a divide-and-conquer approach with swapping and iteration to find the partition.
 * @param a The pointer to the start of the array to partition.
 * @param cmp A comparison function that compares two values.
 * @param sz Size of the array.
 * @param n Number of elements in the array.
 * @return Pointer to the starting address of the partitioned array. 
 */
void* partition(
    void* a,
    int (*cmp)(void const*, void const*),
    size_t sz,
    size_t n
)

回答

/**
 * @brief  要素をピボットごとに分割する。
 * 
 * @param  a  分割対象のメモリ領域（char*）
 * @param  cmp 比較関数のポインタ。比較関数で、2つの要素同士を比較する。
 * @param  sz  分割対象のサイズ。
 * @param  n  要素の数。
 * 
 * @return  分割後の配列の先頭アドレス（void*）。
 */
void* partition(void* a, int (*cmp)(void const*, void const*), size_t sz, size_t n)

gemma-2-9b-it-GGUF

回答

/**
 * Partitions a range of data using the provided comparison function.
 * 
 * The partition algorithm implements a variation of the quicksort partitioning scheme. 
 * It selects the middle element as the pivot and partitions the array around it such that all elements less than or equal to the pivot are placed before it,
 * and all elements greater than the pivot are placed after it.
 * 
 * @param a The base pointer of the data range to partition.
 * @param cmp A comparison function taking two void pointers as arguments and returning an integer indicating their relative order.
 * @param sz The size in bytes of each element in the data range.
 * @param n The number of elements in the data range.
 * 
 * @return A pointer to the final position of the pivot element after partitioning.
 */
void* 
partition(
    void* a,
    int (*cmp)(void const*, void const*),
    size_t sz,
    size_t n
) {
  // ... implementation ...
}

試しにプロンプトの冒頭を, Create a doxygen comment for the following C++ Function in Japanese. doxygen comment onlyとしてみました. 2Bモデルに比べてより詳しい感じがします. "パリティション"とか謎日本語は機械翻訳で学習データ突っ込んだ結果かな.

回答

/**
 * @brief 配列を、ピボット要素を中心として、小さい要素と大きい要素に分割する。

 *  void* 型の配列 `a` を引数とし、比較関数 `cmp` 、要素サイズ `sz`、配列長 `n` を用いて、クイックソートアルゴリズムのパリティション処理を行う。
 * 
 * @param a 配列の先頭アドレス。
 * @param cmp 比較関数ポインタ。`void const*` 型の2つの要素を比較し、小さい方が0、大きい方が正、等しい方が負を返す。
 * @param sz 要素サイズ。
 * @param n 配列の長さ。
 * 
 * @return ピボット要素の位置を示すポインタ。
 */
void* 
partition(
    void* a,
    int (*cmp)(void const*, void const*),
    size_t sz,
    size_t n
) {

TinySwallow-1.5B-Instruct-GGUF

8bit量子化です.

回答

/**
 * @brief A function to partition an array based on a comparison function.
 *
 * This function partition an array `a` into two sub-arrays such that all elements
 * smaller than the pivot (`lo` to `m`) are on the left and all elements greater
 * than the pivot (`m` to `hi`) are on the right.
 *
 * @param a Pointer to the input array.
 * @param cmp A comparison function that takes two void* as arguments and returns
 *           an integer. If cmp(a, b) > 0, a is considered to be smaller than b.
 * @param sz The size of the input array.
 * @param n The number of elements in the input array.
 * @return void* Pointer to the first element of the left sub-array.
 *
 * @note This function assumes that the input array is already sorted or has only
 *       a single element.
 */
void* partition(void* a, int (*cmp)(void const*, void const*), size_t sz, size_t n) {
    // Convert void* to char* for pointer arithmetic
    char* const base = (char*)a;
    
    // If the array is too small, return the base pointer
    if (n <= 1) return base + sz;

    char* lo = base;
    char* hi = &base[sz * (n - 1)];
    char* m = lo + sz * ((hi - lo) / sz / 2);
    
    // Median-of-3 partition
    if (cmp(lo, m) > 0) {
        swap(lo, m, sz);
    }
    if (cmp(m, hi) > 0) {
        swap(m, hi, sz);
        if (cmp(lo, m) > 0) {
            swap(lo, m, sz);
        }
    }
    
    while (1) {
        // Find the smallest element to the left of the pivot
        while (cmp(lo, m) < 0) lo += sz;
        // Find the largest element to the right of the pivot
        while (cmp(m, hi) < 0) hi -= sz;

        // If the pivot is now in its correct position, return it
        if (lo >= hi) return hi + sz;
        
        // Swap elements and adjust the pivot pointer
        swap(lo, hi, sz);
        if (lo == m) {
            m = hi;
        } else if (hi == m) {
            m = lo;
        }
        lo += sz;
        hi -= sz;
    }
}

指示を"次のC++関数のDoxygenコメントを作って下さい。Doxygenコメントだけ作ってください。"と日本語に変更してみました.

回答

/**
 * @brief 配列をソートし、ピボットを指定された条件で分割する。
 *
 * この関数は、配列 `a` を指定された比較関数 `cmp` を使用してソートし、
 * `cmp` が返す値に基づいて配列を分割します。分割された配列の最初の要素の
 * ポインタを返します。
 *
 * @param a 配列
 * @param cmp 比較関数
 * @param sz 配列のサイズ
 * @param n 配列内の要素数
 * @return void* 配列の最初の要素へのポインタ
 *
 * @note この関数は、配列がすでにソートされているか、要素数が1個の場合には
 *       無操作で初期化します。
 */
void*
partition(void* a, int (cmp)(void const*, void const*), size_t sz, size_t n) {
    // void* に対して直接ポインタ演算はできないので予め char* へ変換する
    if (n <= 1) return (void*** * * * * * * * * * * * * * * * * * * * * * * * * *

まとめ

説明が正しいかはともかく, たたき台としては十分なのかなと思います.
プロンプトを試行錯誤していて, ChatGPT 3.5でも最初は余計なものがついていたのですが, このデータセットHugging Face (LukasSonn/DoxygenStrings-Short)を発見してから完璧になりました. データセット見た方が早いのか.
他のモデルも機械的に抽出できるので許容範囲だと思います.
これで, assertionなどから, @pre事前条件や@post事後条件も抽出してくれると完璧なのですが.

2024/04/11 追記

tinyllamaでも, もちろんどんなコードに対しても上手くいったり説明の精度が問題ないかはともかく, 使える可能性がありました. このレベルのサイズになると, Visual Studioの拡張にモデルを同梱できる可能性まであります. deepseek-corder 1.3Bは余計なこと言い出したが, @preなど入れて可能性を感じる. deepseek-corder 6.7Bになるとなぜ簡潔になるのか.

2024/04/19 追記

llama3 はかなり性能がよいのかもしれない.

2024/04/29 追記

Phi-3 を追加, Phi-2は学習データに入っていないらしくDoxygenコメントは全く生成できなかったが, Phi-3ならいけるかもしれない. 曖昧な表現なのは, tinyllamaはこの例では生成できているけれど, どんなコードに対しても安定して生成できるわけではない. やはりサイズに比例して安定してくる.

2024/10/05 追記

gemma-2追加, Elyza code llamaなどは日本語の追加学習でDoxygenコメントの生成能力を失っていますが, 2Bなのに英語・日本語ともにそれなりのコメントを生成してくれます, すごい.

2025/02/01 追記

TinySwallow-1.5B-Instruct追加, Qwenの知識を受け継いでいるっぽい. 日本語で指示した場合もコメントはそれなりに出せている. 無限ループするのはストップトークンを指定すればいいかも. 日英両方とも説明は無茶苦茶なので, 流石にこのサイズだと正しさを期待することはできないと思います.

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up