Clad is an automatic differentiation (AD) clang plugin for C++. Given a C++ source code of a mathematical function, it can automatically generate C++ code for computing derivatives of the function. This project focuses on enabling automatic differentiation of codes that utilise C++ concurrency features such as std::thread
, std::mutex
, atomic operations and more. This will allow users to fully utilize their CPU resources.
An example demonstrating the use of differentiation of codes utilizing parallelization primitives:
#include <cmath>
#include <iostream>
#include <mutex>
#include <numeric>
#include <thread>
#include <vector>
#include "clad/Differentiator/Differentiator.h"
using VectorD = std::vector<double>;
using MatrixD = std::vector<VectorD>;
std::mutex m;
VectorD operator*(const VectorD &l, const VectorD &r) {
VectorD v(l.size());
for (std::size_t i = 0; i < l.size(); ++i)
v[i] = l[i] * r[i];
return v;
}
double dot(const VectorD &v1, const VectorD &v2) {
VectorD v = v1 * v2;
return std::accumulate(v.begin(), v.end(), 0.0);
}
double activation_fn(double z) { return 1 / (1 + std::exp(-z)); }
double compute_loss(double y, double y_estimate) {
return -(y * std::log(y_estimate) + (1 - y) * std::log(1 - y_estimate));
}
void compute_and_add_loss(VectorD x, double y, const VectorD &weights, double b,
double &loss) {
double z = dot(x, weights) + b;
double y_estimate = activation_fn(z);
std::lock_guard<std::mutex> guard(m);
loss += compute_loss(y, y_estimate);
}
/// Compute total loss associated with a single neural neural-network.
/// y_estimate = activation_fn(dot(X[i], weights) + b)
/// Loss of a training data point = - (y_actual * std::log(y_estimate) + (1 - y_actual) * std::log(1 - y_estimate))
/// total loss: summation of loss for all the data points
double compute_total_loss(const MatrixD &X, const VectorD &Y,
const VectorD &weights, double b) {
double loss = 0;
const std::size_t num_of_threads = std::thread::hardware_concurrency();
std::vector<std::thread> threads(num_of_threads);
int thread_id = 0;
for (std::size_t i = 0; i < X.size(); ++i) {
if (threads[thread_id].joinable())
threads[thread_id].join();
threads[thread_id] =
std::thread(compute_and_add_loss, std::cref(X[i]), Y[i],
std::cref(weights), b, std::ref(loss));
thread_id = (thread_id + 1) % num_of_threads;
}
for (std::size_t i = 0; i < num_of_threads; ++i) {
if (threads[i].joinable())
threads[i].join();
}
return loss;
}
int main() {
auto loss_grad = clad::gradient(compute_total_loss);
// Fill the values as required!
MatrixD X;
VectorD Y;
VectorD weights;
double b;
// derivatives
// Zero the derivative variables and make them of the same dimension as the
// corresponding primal values.
MatrixD d_X;
VectorD d_Y;
VectorD d_weights;
double d_b = 0;
loss_grad.execute(X, Y, weights, b, &d_X, &d_Y, &d_weights, &d_b);
std::cout << "dLossFn/dW[2]: " << d_weights[2] << "\n"; // Partial derivative of the loss function w.r.t weight[2]
std::cout << "dLossFn/db: " << d_b << "\n"; // Partial derivative of the loss function w.r.t b
}