Jabu
Jabu

Reputation: 135

How to calculate file hash with crypto++

I'm trying to convert this qt hash function to "c++ only" and using the crypto++ lib

QString calculateGitHubFileHash(const QString& filePath) 
{
    QFile file(filePath);
    QString shaHash;
    if( file.open(QFile::ReadOnly ) )
    {
        QCryptographicHash hash(QCryptographicHash::Sha1);
        QByteArray header = QString("blob %1").arg(file.size()).toUtf8(); // Need to be null terminated
        hash.addData(header.data(), header.size() + 1);
        hash.addData(file.readAll());

        shaHash = hash.result().toHex();
    }
    return shaHash;
}

A working reproducible example:

#include <curl/curl.h>
#include <regex>
#include <fstream>
#include <cryptopp/sha.h>
#include <cryptopp/hex.h>
#include <cryptopp/filters.h>

static size_t WriteMemoryCallback(void* contents, size_t size, size_t nmemb, void* userp)
{
    size_t realsize = size * nmemb;
    auto& mem = *static_cast<std::string*>(userp);
    mem.append(static_cast<char*>(contents), realsize);
    return realsize;
}

void curl(std::string& data, const std::string& url)
{
    CURL* curl_handle;
    curl_handle = curl_easy_init();
    curl_easy_setopt(curl_handle, CURLOPT_URL, url.data());
    curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
    curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, &data);
    curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, "curl/7.55.1");
    curl_easy_perform(curl_handle);
    curl_easy_cleanup(curl_handle);
    curl_global_cleanup();
}

std::string calculateGitHubFileHash(const std::string& filePath) 
{
    std::ifstream file(filePath, std::ios::binary);
    std::string shaHash;
    if (file.is_open())
    {
        // Get the size of the file
        file.seekg(0, std::ios::end);
        std::streamsize size = file.tellg();
        file.seekg(0, std::ios::beg);

        // Calculate the header
        std::string header = "blob " + std::to_string(size);

        // Read the file into a std::vector<byte>
        std::vector<byte> fileContent(size);
        file.read(reinterpret_cast<char*>(&fileContent[0]), size);

        // Calculate the SHA1 hash
        CryptoPP::SHA1 sha1;
        CryptoPP::StringSource(
            header + reinterpret_cast<const char*>(fileContent.data()), 
            true, 
            new CryptoPP::HashFilter(sha1, new CryptoPP::HexEncoder(new CryptoPP::StringSink(shaHash)))
        );
    }
    return shaHash;
}

int main()
{
    std::string json;
    curl(json, "https://api.github.com/repos/jajabu33/test/contents");

    // Using regex to parse the json to reduce code size, as this is just an reproducible example
    std::regex nameRegex("\"name\":\\s*\"(.*?)\"");
    std::regex shaRegex("\"sha\":\\s*\"(.*?)\"");
    std::smatch nameMatches, shaMatches;

    auto nIt = std::sregex_iterator(json.begin(), json.end(), nameRegex);
    auto sIt = std::sregex_iterator(json.begin(), json.end(), shaRegex);

    for (auto itName = nIt, itSha = sIt; itName != std::sregex_iterator() && itSha != std::sregex_iterator(); ++itName, ++itSha)
    {
        nameMatches = *itName;
        shaMatches = *itSha;
        std::string fileName = nameMatches[1];
        std::string sha = shaMatches[1];
        std::string localFileHash = calculateGitHubFileHash("C:/test.txt");
        if (sha != localFileHash)
            std::cout << "File " << fileName << " is not up to date\n";
    }
}

On GitHub json test.txt hash is 95d09f2b10159347eece71399a7e2e907ea3df4f using the qt function on a local copy of test.txt it produce the same hash.

However, calculateGithubFileHash written with crypto++ the hash is 07C467E5526EEDE9510953A974DB07F3138AFA57

What is the correct way to calculate the hash appending the blob header to it?

Upvotes: 1

Views: 234

Answers (0)

Related Questions