From 2f154b3fcfdded95c2ad0148da92bf03aa80f076 Mon Sep 17 00:00:00 2001 From: cobrapitz <12397702+cobrapitz@users.noreply.github.com> Date: Mon, 8 May 2023 02:11:07 +0200 Subject: [PATCH] init --- .gitignore | 79 ++++++++ .gitmodules | 3 - .idea/.gitignore | 8 + .idea/GodotHubBackend.iml | 2 + .idea/misc.xml | 7 + .idea/modules.xml | 8 + CMakeLists.txt | 53 ++++++ extern/cblib | 1 - src/apps/fetch_godot.cpp | 34 ++++ src/apps/fetch_godot.h | 21 +++ src/godot_version/native_godot_version.cpp | 35 ++++ src/godot_version/native_godot_version.h | 29 +++ src/godot_version_scraper/godot_structs.h | 82 +++++++++ .../godot_version_scraper.cpp | 169 ++++++++++++++++++ .../godot_version_scraper.h | 30 ++++ src/main.cpp | 32 ++++ 16 files changed, 589 insertions(+), 4 deletions(-) create mode 100644 .gitignore create mode 100644 .idea/.gitignore create mode 100644 .idea/GodotHubBackend.iml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 CMakeLists.txt delete mode 160000 extern/cblib create mode 100644 src/apps/fetch_godot.cpp create mode 100644 src/apps/fetch_godot.h create mode 100644 src/godot_version/native_godot_version.cpp create mode 100644 src/godot_version/native_godot_version.h create mode 100644 src/godot_version_scraper/godot_structs.h create mode 100644 src/godot_version_scraper/godot_version_scraper.cpp create mode 100644 src/godot_version_scraper/godot_version_scraper.h create mode 100644 src/main.cpp diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c53f9b2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,79 @@ +/cmake-build-debug/ +/cmake-build-release/ + + +### C++ template +# Compiled Object files +*.slo +*.lo +*.o +*.obj + +# Precompiled Headers +*.gch +*.pch + +# Compiled Dynamic libraries +*.so +*.dylib +*.dll + +# Fortran module files +*.mod + +# Compiled Static libraries +*.lai +*.la +*.a +*.lib + +# Executables +*.exe +*.out +*.app + +### JetBrains template +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff: +.idea/workspace.xml +.idea/tasks.xml +.idea/dictionaries +.idea/vcs.xml +.idea/jsLibraryMappings.xml + +# Sensitive or high-churn files: +.idea/dataSources.ids +.idea/dataSources.xml +.idea/dataSources.local.xml +.idea/sqlDataSources.xml +.idea/dynamic.xml +.idea/uiDesigner.xml + +# Gradle: +.idea/gradle.xml +.idea/libraries + +# Mongo Explorer plugin: +.idea/mongoSettings.xml + +## File-based project format: +*.iws + +## Plugin-specific files: + +# IntelliJ +/out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties diff --git a/.gitmodules b/.gitmodules index 30bdbd9..509e1a1 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,3 @@ [submodule "cmake"] path = cmake url = git@git.cobrapitz.de:cobrapitz/cmake.git -[submodule "extern/cblib"] - path = extern/cblib - url = git@github.com:cobrapitz/cblib.git diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/GodotHubBackend.iml b/.idea/GodotHubBackend.iml new file mode 100644 index 0000000..f08604b --- /dev/null +++ b/.idea/GodotHubBackend.iml @@ -0,0 +1,2 @@ + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..6ac60cc --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..c672d58 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..0a3856f --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,53 @@ +cmake_minimum_required(VERSION 3.25) +project(GodotHubBackend) + +####################################################################################################################### +# Project specs +####################################################################################################################### + +project(${PROJECT_NAME} VERSION 0.1.0 DESCRIPTION "Godot-Hub Backend.") + +#set(CMAKE_BINARY_DIR ${CMAKE_SOURCE_DIR}/) +include(cmake/CMakeSettings.cmake) + + +####################################################################################################################### +# Add libs +####################################################################################################################### + +include(FetchContent) + +set(NLOHMANN_JSON_GIT_TAG v3.11.2) +include(cmake/FetchNlohmannJSON.cmake) + +set(CPR_GIT_TAG 1.10.2) +set(BUILD_SHARED_LIBS OFF) +include(cmake/Fetchcpr.cmake) + +set(CBLIB_GIT_TAG v0.1.4) +include(cmake/FetchCBLib.cmake) + +FetchContent_MakeAvailable(cpr json cblib) + + +####################################################################################################################### +# Building executable +####################################################################################################################### + +add_executable(${PROJECT_NAME} + src/main.cpp + src/apps/fetch_godot.cpp + src/godot_version_scraper/godot_version_scraper.cpp + src/godot_version/native_godot_version.cpp + ) + +target_include_directories(${PROJECT_NAME} + PUBLIC src + ) + +target_link_libraries(${PROJECT_NAME} PRIVATE + nlohmann_json::nlohmann_json + cpr::cpr + cblib + ) + diff --git a/extern/cblib b/extern/cblib deleted file mode 160000 index 3e7cf37..0000000 --- a/extern/cblib +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 3e7cf374db1d9ad92ec66d62be39cb9483f49682 diff --git a/src/apps/fetch_godot.cpp b/src/apps/fetch_godot.cpp new file mode 100644 index 0000000..13e3929 --- /dev/null +++ b/src/apps/fetch_godot.cpp @@ -0,0 +1,34 @@ + +#include "fetch_godot.h" + +#include "../godot_version_scraper/godot_version_scraper.h" + +#include + +#include + + +void FetchGodot::run() { + std::cout << "Requesting godot versions... (may take a while):" << std::endl; + file_objects = GodotVersionScraper::request_godot_versions(); +} + +void FetchGodot::save(const std::filesystem::path &config_file_path) { + using jsonf = nlohmann::json; + jsonf jsonfile; + + jsonfile["available_godot_versions"] = jsonf{}; + auto &available_godot_versions = jsonfile["available_godot_versions"]; + + for (auto &&file_object: file_objects.files) { + available_godot_versions[file_object.url] = to_json(file_object); + } + + if (fs::exists(config_file_path)) { + fs::remove(config_file_path); + } + + std::ofstream config_file(config_file_path); + config_file << jsonfile; + config_file.close(); +} diff --git a/src/apps/fetch_godot.h b/src/apps/fetch_godot.h new file mode 100644 index 0000000..c198412 --- /dev/null +++ b/src/apps/fetch_godot.h @@ -0,0 +1,21 @@ + +#ifndef GODOTHUB_FETCH_H +#define GODOTHUB_FETCH_H + +#include "../godot_version_scraper/godot_structs.h" + +#include +#include + +class FetchGodot { +private: + GodotFileObjects file_objects; + +public: + void run(); + + void save(const std::filesystem::path &config_file_path); +}; + + +#endif //GODOTHUB_FETCH_H diff --git a/src/godot_version/native_godot_version.cpp b/src/godot_version/native_godot_version.cpp new file mode 100644 index 0000000..1b5f586 --- /dev/null +++ b/src/godot_version/native_godot_version.cpp @@ -0,0 +1,35 @@ +#include "native_godot_version.h" + +#define USE_CBLIB_DEFINES + +#include + +using namespace godot; + +std::ostream &godot::operator<<(std::ostream &os, const GodotVersion &version) { + os << "filename: " << version.filename << "\npath: " << version.path << "\nsize: " << version.size << "\nurl: " + << version.url << "\nversion_text: " << version.version_text; + return os; +} + +void godot::to_json(json &json_data, const GodotVersion &godot_version) { + json_data = json{ + {"filename", godot_version.filename}, + {"path", godot_version.path}, + {"size", godot_version.size}, + {"url", godot_version.url}, + {"version_text", godot_version.version_text}, + }; +} + +void godot::from_json(const json &json_data, GodotVersion &godot_version) { + json_data.at("filename").get_to(godot_version.filename); + if (json_data.contains("path")) { + json_data.at("path").get_to(godot_version.path); + } + json_data.at("size").get_to(godot_version.size); + json_data.at("url").get_to(godot_version.url); + if (json_data.contains("version_text")) { + json_data.at("version_text").get_to(godot_version.version_text); + } +} diff --git a/src/godot_version/native_godot_version.h b/src/godot_version/native_godot_version.h new file mode 100644 index 0000000..a38ed61 --- /dev/null +++ b/src/godot_version/native_godot_version.h @@ -0,0 +1,29 @@ +#ifndef GODOT_HUB_MAP_NATIVE_GODOT_VERSION_H +#define GODOT_HUB_MAP_NATIVE_GODOT_VERSION_H + +#include + +#include +#include + + +namespace godot { +using json = nlohmann::json; + +struct GodotVersion { + std::string filename; + std::string path; + std::string size; + std::string url; + std::string version_text; + + friend std::ostream &operator<<(std::ostream &os, const GodotVersion &version); +}; + +void to_json(json &json_data, const GodotVersion &godot_version); + +void from_json(const json &json_data, GodotVersion &godot_version); + +} // godot + +#endif //GODOT_HUB_MAP_NATIVE_GODOT_VERSION_H diff --git a/src/godot_version_scraper/godot_structs.h b/src/godot_version_scraper/godot_structs.h new file mode 100644 index 0000000..b577a05 --- /dev/null +++ b/src/godot_version_scraper/godot_structs.h @@ -0,0 +1,82 @@ + +#ifndef GODOTHUB_GODOT_STRUCTS_H +#define GODOTHUB_GODOT_STRUCTS_H + + +#include + +#include +#include +#include + +struct GodotDirectory { + std::string directory_name; + std::string url; + std::string cached_directory_url; + + friend std::ostream &operator<<(std::ostream &os, const GodotDirectory &directory) { + os << "directory_name: " << directory.directory_name + << " url: " << directory.url; + return os; + } +}; + +struct GodotFileObject { + std::string filename; + std::string url; + std::string size; + std::string version_text; + std::string path; + + friend std::ostream &operator<<(std::ostream &os, const GodotFileObject &godot_filename) { + os << "filename: " << godot_filename.filename + << " url: " << godot_filename.url + << " version_text: " << godot_filename.version_text + << " path: " << godot_filename.path + << " size: " << godot_filename.size; + return os; + } +}; + +struct GodotFileObjects { + std::vector directories; + std::vector files; + + friend std::ostream &operator<<(std::ostream &os, const GodotFileObjects &objects) { + os << "Files: " << "\n"; + for (auto &&file: objects.files) { + os << " - " << file.filename << "\n"; + } + + os << "Dirs: " << "\n"; + for (auto &&dir: objects.directories) { + os << " - " << dir.directory_name << "\n"; + } + return os; + } +}; + + +inline nlohmann::json to_json(const GodotFileObject &godot_version) { + return nlohmann::json{ + {"filename", godot_version.filename}, + {"path", godot_version.path}, + {"size", godot_version.size}, + {"url", godot_version.url}, + {"version_text", godot_version.version_text}, + }; +} + +inline void from_json(const nlohmann::json &json_data, GodotFileObject &godot_version) { + json_data.at("filename").get_to(godot_version.filename); + if (json_data.contains("path")) { + json_data.at("path").get_to(godot_version.path); + } + json_data.at("size").get_to(godot_version.size); + json_data.at("url").get_to(godot_version.url); + if (json_data.contains("version_text")) { + json_data.at("version_text").get_to(godot_version.version_text); + } +} + +#endif //GODOTHUB_GODOT_STRUCTS_H diff --git a/src/godot_version_scraper/godot_version_scraper.cpp b/src/godot_version_scraper/godot_version_scraper.cpp new file mode 100644 index 0000000..6e44e9c --- /dev/null +++ b/src/godot_version_scraper/godot_version_scraper.cpp @@ -0,0 +1,169 @@ + +#include "godot_version_scraper.h" + +#define USE_CBLIB_DEFINES + +#include + +#include +#include +#include +#include +#include + + +GodotFileObjects GodotVersionScraper::request_godot_file_objects(const std::string &base_url) { + GodotFileObjects godot_file_objects; + + godot_file_objects.files.reserve(1024 * 8); + godot_file_objects.directories.reserve(1024 * 8); + + std::vector pages{}; + pages.reserve(64 * 10); + pages.push_back(base_url); + u32 pages_amount = 1; + + std::shared_ptr session = std::make_shared(); + + while (pages_amount >= 1) { + u32 scraped_pages_amount = pages_amount; + std::vector> async_responses{}; + + for (int i = 0; i < scraped_pages_amount; i++) { + auto url = cpr::Url{pages.at(i)}; + session->SetOption(url); + auto async_resp = session->GetAsync(); + + auto response = async_resp.get(); + GodotFileObjects current_file_objects = fetch_page_content(response); + pages_amount += current_file_objects.directories.size(); + + godot_file_objects.files.insert( + std::end(godot_file_objects.files), + std::begin(current_file_objects.files), + std::end(current_file_objects.files) + ); + + godot_file_objects.directories.insert( + std::end(godot_file_objects.directories), + std::begin(current_file_objects.directories), + std::end(current_file_objects.directories) + ); + + for (auto &&dir: current_file_objects.directories) { + pages.push_back(response.url.str() + dir.cached_directory_url); + } + + pages_amount -= 1; + } + // erase the first N pages, that have already been scraped + pages.erase(pages.begin(), pages.begin() + scraped_pages_amount); + } + + std::cout << "Finished scraping" << std::endl; + + return godot_file_objects; +} + +GodotFileObjects GodotVersionScraper::fetch_page_content(const cpr::Response &response) { + static const std::string_view row_regex_str{"(.*)<\\/tr>"}; + static const std::string_view filename_regex_str{".*<\\/a>"}; + static const std::string_view size_regex_str{"(.*)<\\/td>"}; + + static const std::string_view begin_file_element = ")"; + + static const std::string_view begin_size_element = ""; + static const std::string_view end_size_element = ""; + + const auto &text = response.text; + std::regex word_regex(row_regex_str.data()); + auto words_begin = std::sregex_iterator(text.begin(), text.end(), word_regex); + auto words_end = std::sregex_iterator(); + + GodotFileObjects godot_file_objects; + + for (std::sregex_iterator i = words_begin; i != words_end; ++i) { + const std::smatch &match = *i; + std::string match_str = match[1].str(); + + auto begin_pos = match_str.find(begin_file_element) + begin_file_element.size(); + auto end_pos = match_str.find(end_file_element, begin_pos); + if (end_pos == std::string::npos || begin_pos == std::string::npos) { + continue; + } + + const std::string file_element = match_str.substr(begin_pos, end_pos - begin_pos); + + if (isInvalidFileOrFolder(file_element)) { + continue; + } + + const std::string &url = file_element; + const std::string full_url = response.url.str() + url; + + if (cb::string::ends_with(file_element, "/")) { + godot_file_objects.directories.emplace_back(GodotDirectory{file_element, full_url, url}); + } else { + begin_pos = match_str.find(begin_size_element) + begin_size_element.size(); + end_pos = match_str.find(end_size_element, begin_pos); + std::string file_size = "0M"; + if (end_pos == std::string::npos || begin_pos == std::string::npos) { + std::cout << "couldn't parse file size for " << file_element << std::endl; + } else { + file_size = match_str.substr(begin_pos, end_pos - begin_pos); + } + + const GodotFileObject &godot_filename = GodotFileObject{ + file_element, + full_url, + file_size, + extract_version_from_url(full_url), + }; + godot_file_objects.files.emplace_back(godot_filename); + } + } + + return godot_file_objects; +} + +bool GodotVersionScraper::isInvalidFileOrFolder(const std::string &file_element) { +// using namespace cb::string; + return cb::string::starts_with(file_element, "media") + or cb::string::starts_with(file_element, "patreon") + or cb::string::starts_with(file_element, "testing") + or cb::string::starts_with(file_element, "toolchains") + or cb::string::starts_with(file_element, "s=") + or cb::string::starts_with(file_element, "..") + or cb::string::ends_with(file_element, ".txt") + or cb::string::ends_with(file_element, ".sha256"); +} + +GodotFileObjects GodotVersionScraper::request_godot_versions() { + const std::string url = "https://downloads.tuxfamily.org/godotengine/"; + + GodotFileObjects file_objects = request_godot_file_objects(url); + std::cout << "got " << file_objects.files.size() << " files" << std::endl; + std::cout << "got " << file_objects.directories.size() << " directories" << std::endl; + + return file_objects; +} + +std::string GodotVersionScraper::extract_version_from_url(const std::string &url) { + std::size_t start_pos = url.rfind("/Godot"); + if (start_pos == std::string::npos) { + start_pos = url.rfind("/godot"); + if (start_pos == std::string::npos) { + return ""; + } + } + // + 1 because of Godot_ or Godot- -> remove the -/_ + start_pos += 6 + 1; + + std::size_t endPos = url.rfind('.'); + if (endPos == std::string::npos) { + return ""; + } + + return url.substr(start_pos, endPos - start_pos); +} diff --git a/src/godot_version_scraper/godot_version_scraper.h b/src/godot_version_scraper/godot_version_scraper.h new file mode 100644 index 0000000..f80f678 --- /dev/null +++ b/src/godot_version_scraper/godot_version_scraper.h @@ -0,0 +1,30 @@ + +#ifndef GODOTHUB_GODOTVERSIONSCRAPER_H +#define GODOTHUB_GODOTVERSIONSCRAPER_H + + +#include "godot_structs.h" + +#include +#include + + +class GodotVersionScraper { +public: + [[nodiscard]] + static GodotFileObjects request_godot_versions(); + +private: + [[nodiscard]] + static GodotFileObjects request_godot_file_objects(const std::string &base_url); + + [[nodiscard]] + static GodotFileObjects fetch_page_content(const cpr::Response &response); + + static bool isInvalidFileOrFolder(const std::string &file_element); + + static std::string extract_version_from_url(const std::string &url); +}; + + +#endif //GODOTHUB_GODOTVERSIONSCRAPER_H diff --git a/src/main.cpp b/src/main.cpp new file mode 100644 index 0000000..502a97b --- /dev/null +++ b/src/main.cpp @@ -0,0 +1,32 @@ +#include "godot_version_scraper/godot_structs.h" +#include "apps/fetch_godot.h" + +#define USE_CBLIB_DEFINES + +#include + +#include + + +int main(int argc, char **argv) { + using namespace cb::argparser; + ArgumentParser argument_parser{}; + + argument_parser.add_argument( + "-o", "--output", Type::STRING_TYPE, + "config.json", "Output file path for the config", + Mode::REQUIRED_MODE); + + ArgumentMap arguments = argument_parser.parse(argc, argv); + + if (arguments["-h"]) { + std::cout << argument_parser.get_help() << std::endl; + return 0; + } + + auto output_file_path = arguments["-o"].as_str(); + + FetchGodot godot_fetcher{}; + godot_fetcher.run(); + godot_fetcher.save(output_file_path); +}