working multithreading

finally managed to multithread folder scanner and rtp. it may still have some bugs. but it works
This commit is contained in:
jakani24
2023-12-26 21:17:39 +01:00
parent f069abeb7b
commit c3f3fb85af
82 changed files with 2006 additions and 173 deletions

View File

@@ -4,6 +4,9 @@
#include <openssl/md5.h>
#include <windows.h>
#include <iostream>
#include <thread>
#include <chrono>
#include <time.h>
#include "md5hash.h"
#include <string>
#include "well_known.h"
@@ -11,7 +14,100 @@
#include "virus_ctrl.h"
#ifndef SCAN_CPP
#define SCAN_CPP
std::unordered_map<std::string, HANDLE> fileHandles;
std::unordered_map<std::string, HANDLE> mappingHandles;
std::unordered_map<std::string, char*> fileData;
int cnt = 0;
int num_threads=0;
//load all the db files into memory
void initialize(const std::string& folderPath) {
for (char firstChar = '0'; firstChar <= 'f'; ++firstChar) {
for (char secondChar = '0'; secondChar <= 'f'; ++secondChar) {
// Ensure that the characters are valid hexadecimal digits
if (!std::isxdigit(firstChar) || !std::isxdigit(secondChar) or std::isupper(firstChar) or std::isupper(secondChar)) {
continue;
}
// Create the filename based on the naming convention
std::string filename = folderPath +"\\" + firstChar + secondChar + ".jdbf";
//printf("Loading %s\n", filename.c_str());
// Open the file
HANDLE hFile = CreateFile(filename.c_str(), GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
if (hFile == INVALID_HANDLE_VALUE) {
log(LOGLEVEL::ERR, "[initialize()]: Error opening database file: ", filename);
continue; // Move on to the next file if there's an error
}
// Create the file mapping
HANDLE hMapping = CreateFileMapping(hFile, NULL, PAGE_READONLY, 0, 0, NULL);
if (hMapping == NULL) {
log(LOGLEVEL::ERR, "[initialize()]: Error creating database file mapping: ", filename);
CloseHandle(hFile);
continue; // Move on to the next file if there's an error
}
// Map the file into memory
char* fileDataPtr = static_cast<char*>(MapViewOfFile(hMapping, FILE_MAP_READ, 0, 0, 0));
if (fileDataPtr == NULL) {
log(LOGLEVEL::ERR, "[initialize()]: Error mapping database file: ", filename);
CloseHandle(hMapping);
CloseHandle(hFile);
continue; // Move on to the next file if there's an error
}
// Store the handles in the global maps
fileHandles[filename] = hFile;
mappingHandles[filename] = hMapping;
fileData[filename] = fileDataPtr;
}
}
}
// Call this function when you are done using the file mappings
void cleanup() {
for (const auto& entry : fileHandles) {
UnmapViewOfFile(fileData[entry.first]);
CloseHandle(mappingHandles[entry.first]);
CloseHandle(entry.second);
}
// Clear the global maps
fileHandles.clear();
mappingHandles.clear();
fileData.clear();
}
//the latest and fastest version of searching a hash by now
int search_hash(const std::string& dbname_, const std::string& hash_, const std::string& filepath_) {
// Check if the file mapping is already open for the given filename
thread_local std::string dbname (dbname_);
thread_local std::string hash (hash_);
thread_local std::string filepath (filepath_);
thread_local auto fileIter = fileHandles.find(dbname);
if (fileIter == fileHandles.end()) {
log(LOGLEVEL::ERR, "[search_hash()]: File mapping not initialized for ", dbname);
return 2;
}
// Use fileData for subsequent searches
thread_local DWORD fileSize = GetFileSize(fileHandles[dbname], NULL);
thread_local std::string fileContent(fileData[dbname], fileSize);
// Search for the specific string in the file content
thread_local size_t foundPos = fileContent.find(hash);
if (foundPos != std::string::npos) {
log(LOGLEVEL::VIRUS, "[search_hash()]: Found virus: ", hash, " in file: ", filepath);
virus_ctrl_store(filepath.c_str(), hash.c_str(), hash.c_str());
//afterwards do the processing with that file
virus_ctrl_process(hash.c_str());
return 1; // Found
}
return 0; // Not found
}
bool file_exists(const std::string& filePath) {
DWORD fileAttributes = GetFileAttributes(filePath.c_str());
@@ -23,7 +119,9 @@ bool file_exists(const std::string& filePath) {
// Check if it's a regular file and not a directory
return (fileAttributes & FILE_ATTRIBUTE_DIRECTORY) == 0;
}
void ListFilesRecursive(const std::string& directory, int thread_id) {
//this is the main function to scan folders. it will then start multuiple threads based on the number of cores / settings
void scan_folder_recursive(const std::string& directory) {
std::string search_path = directory + "\\*.*";
WIN32_FIND_DATA find_file_data;
HANDLE hFind = FindFirstFile(search_path.c_str(), &find_file_data);
@@ -39,92 +137,88 @@ void ListFilesRecursive(const std::string& directory, int thread_id) {
}
std::string full_path = directory + "\\" + find_file_data.cFileName;
const std::string full_path = directory + "\\" + find_file_data.cFileName;
if (find_file_data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
// If it's a directory, recurse into it
ListFilesRecursive(full_path,thread_id);
scan_folder_recursive(full_path);
}
else {
// If it's a file, print its name
/*char md5Hash[2 * MD5_DIGEST_LENGTH + 1];
md5_file(full_path.c_str(), md5Hash);
printf("%s\n", md5Hash);
printf("%d\n", scan_hash("C:\\Users\\janis\\Documents\\ma_av_tests\\OutputFile.txt", "96be95b122c2b9b8bb5765c312ca4f73"));
if (scan_hash("C:\\Users\\janis\\Documents\\ma_av_tests\\OutputFile.txt", md5Hash) == 1) {
printf("virus found");
//action scanfile_t will start the trheads for scanning the hashes
//action_scanfile_t(full_path.c_str());
//do multithreading here
while (num_threads >= std::thread::hardware_concurrency()) {
Sleep(10);
}
num_threads++;
std::thread scan_thread(action_scanfile_t, full_path);
scan_thread.detach();
}*/
cnt++;
if (cnt % 1000 == 0) {
printf("Processed %d files; sent from thread %d\n", cnt,thread_id);
if (cnt % 100 == 0) {
printf("Processed %d files;\n", cnt);
//printf("Number of threads: %d\n", num_threads);
}
}
} while (FindNextFile(hFind, &find_file_data) != 0);
FindClose(hFind);
}
int scan_hash(const std::string& filename, const std::string& searchString) {//!!!! does not work with e.g. utf-16 or something like that. either ascii or utf8!!
HANDLE hFile = CreateFile(filename.c_str(), GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
if (hFile == INVALID_HANDLE_VALUE) {
log(LOGLEVEL::ERR, "[scan_hash()]: Error opening database file: ", filename, " while searching for hash.", searchString);
return 2;
}
HANDLE hMapping = CreateFileMapping(hFile, NULL, PAGE_READONLY, 0, 0, NULL);
if (hMapping == NULL) {
log(LOGLEVEL::ERR, "[scan_hash()]: Error creating database file mapping: ", filename, " while searching for hash.");
CloseHandle(hFile);
return 2;
}
char* fileData = static_cast<char*>(MapViewOfFile(hMapping, FILE_MAP_READ, 0, 0, 0));
if (fileData == NULL) {
log(LOGLEVEL::ERR, "[scan_hash()]: Error mapping database file: ", filename, " while searching for hash.");
CloseHandle(hMapping);
CloseHandle(hFile);
return 2;
}
DWORD fileSize = GetFileSize(hFile, NULL);
std::string fileContent(fileData, fileSize);
// Search for the specific string in the file content
size_t foundPos = fileContent.find(searchString);
if (foundPos != std::string::npos) {
UnmapViewOfFile(fileData);
CloseHandle(hMapping);
CloseHandle(hFile);
return 1;//found
}
// Unmap the memory and close the handles
UnmapViewOfFile(fileData);
CloseHandle(hMapping);
CloseHandle(hFile);
return 0;
}
int scan_hash(const char* hash) {
char* path = new char[600];
path[0] = '\0';
sprintf_s(path, 595, "%s\\%c%c.jdbf", DB_DIR, hash[0],hash[1]);
return scan_hash(path,hash);
}
//for singlethreaded scans
void action_scanfile(const char*filepath) {
if (strlen(filepath) == 0 or strcmp("", filepath) == 0 or file_exists(filepath)==false) {
log(LOGLEVEL::ERR, "[action_scanfile()]: Error opening file: ", filepath, " while scanning file for viruses.");
return; //no filepath given
char* db_path = new char[300];
//log(LOGLEVEL::INFO, "[action_scanfile_t()]: Scanning file: ", filepath);
if (strlen(filepath) == 0 or strcmp("", filepath) == 0 or file_exists(filepath) == false) {
return; //no filepath given or file not accessible
}
char*hash = new char[300];
md5_file(filepath, hash);
if (scan_hash(hash)==1) { //virus found
log(LOGLEVEL::VIRUS, "[action_scanfile()]: Virus found in file: ", filepath);
//add it to a database which stores filepaths of infected files
virus_ctrl_store(filepath,hash,"sf");
//afterwards do the processing with that file
virus_ctrl_process("sf");
else {
char* hash = new char[300];
hash[0] = '\0';
strcpy_s(hash, 295, md5_file_t(filepath).c_str());
sprintf_s(db_path, 295, "%s\\%c%c.jdbf", DB_DIR, hash[0], hash[1]);
search_hash(db_path, hash, filepath);
delete[] hash;
}
delete[] hash;
delete[] db_path;
}
/*
void action_scanfile_t( const char*filepath) {
char* db_path = new char[300];
int max_threads = std::thread::hardware_concurrency();
//log(LOGLEVEL::INFO, "[action_scanfile_t()]: Scanning file: ", filepath);
if (strlen(filepath) == 0 or strcmp("", filepath) == 0 or file_exists(filepath) == false) {
return; //no filepath given or file not accessible
}
else {
char* hash = new char[300];
hash[0] = '\0';
hash = md5_file_t(filepath);
sprintf_s(db_path, 295, "%s\\%c%c.jdbf", DB_DIR, hash[0], hash[1]);
while (num_threads >= max_threads) {
Sleep(10);
}
num_threads++;
std::thread search_thread(search_hash,db_path, hash, filepath);
search_thread.detach();
std::this_thread::sleep_for(std::chrono::microseconds(50));
delete[] hash;
}
delete[] db_path;
}*/
void action_scanfile_t(const std::string& filepath_) {
thread_local const std::string filepath (filepath_);
thread_local char* db_path = new char[300];
thread_local char*hash = new char[300];
strcpy_s(hash,295 ,md5_file_t(filepath).c_str());
sprintf_s(db_path, 295, "%s\\%c%c.jdbf", DB_DIR, hash[0], hash[1]);
search_hash(db_path, hash, filepath);
num_threads--;
}
#endif