feat: implement basic hashing algorithm for all unknown hashes

This commit is contained in:
Alphyron 2024-08-24 11:14:27 +02:00
parent 9f33bdf4d3
commit f9640b3d90
8 changed files with 785 additions and 10 deletions

10
.env
View File

@ -1 +1,11 @@
RUST_LOG=INFO
DATABASE_URL=postgres://anthrove:anthrove@localhost:5432/anthrove DATABASE_URL=postgres://anthrove:anthrove@localhost:5432/anthrove
SCRAPE_USER=alphyron
SCRAPE_API_KEY=rZyf15CRCMoeCkCebt4tj2Bd
E6_USERS=Selloo
DB_ENDPOINT=db
DB_USERNAME=anthrove
DB_PASSWORD=anthrove
DB_DATABASE=anthrove
DB_SSL=false

1
.gitignore vendored
View File

@ -227,3 +227,4 @@ $RECYCLE.BIN/
.idea/ .idea/
.env

658
Cargo.lock generated
View File

@ -140,6 +140,12 @@ dependencies = [
"num-traits", "num-traits",
] ]
[[package]]
name = "atomic-waker"
version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
[[package]] [[package]]
name = "autocfg" name = "autocfg"
version = "1.3.0" version = "1.3.0"
@ -299,6 +305,17 @@ dependencies = [
"windows-targets 0.52.6", "windows-targets 0.52.6",
] ]
[[package]]
name = "clocksource"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "129026dd5a8a9592d96916258f3a5379589e513ea5e86aeb0bd2530286e44e9e"
dependencies = [
"libc",
"time",
"winapi",
]
[[package]] [[package]]
name = "color_quant" name = "color_quant"
version = "1.1.0" version = "1.1.0"
@ -326,6 +343,16 @@ version = "0.9.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8"
[[package]]
name = "core-foundation"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f"
dependencies = [
"core-foundation-sys",
"libc",
]
[[package]] [[package]]
name = "core-foundation-sys" name = "core-foundation-sys"
version = "0.8.7" version = "0.8.7"
@ -465,6 +492,15 @@ dependencies = [
"zeroize", "zeroize",
] ]
[[package]]
name = "deranged"
version = "0.3.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4"
dependencies = [
"powerfmt",
]
[[package]] [[package]]
name = "digest" name = "digest"
version = "0.10.7" version = "0.10.7"
@ -492,6 +528,15 @@ dependencies = [
"serde", "serde",
] ]
[[package]]
name = "encoding_rs"
version = "0.8.34"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b45de904aa0b010bce2ab45264d0631681847fa7b6f2eaa7dab7619943bc4f59"
dependencies = [
"cfg-if",
]
[[package]] [[package]]
name = "env_filter" name = "env_filter"
version = "0.1.2" version = "0.1.2"
@ -576,6 +621,21 @@ version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
name = "foreign-types"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1"
dependencies = [
"foreign-types-shared",
]
[[package]]
name = "foreign-types-shared"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b"
[[package]] [[package]]
name = "form_urlencoded" name = "form_urlencoded"
version = "1.2.1" version = "1.2.1"
@ -700,6 +760,8 @@ dependencies = [
"image", "image",
"img_hash", "img_hash",
"log", "log",
"ratelimit",
"reqwest",
"serde", "serde",
"serde_json", "serde_json",
"sqlx", "sqlx",
@ -744,6 +806,25 @@ version = "0.29.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd" checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd"
[[package]]
name = "h2"
version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "524e8ac6999421f49a846c2d4411f337e53497d8ec55d67753beffa43c5d9205"
dependencies = [
"atomic-waker",
"bytes",
"fnv",
"futures-core",
"futures-sink",
"http",
"indexmap",
"slab",
"tokio",
"tokio-util",
"tracing",
]
[[package]] [[package]]
name = "hamming" name = "hamming"
version = "0.1.3" version = "0.1.3"
@ -814,12 +895,125 @@ dependencies = [
"windows-sys 0.52.0", "windows-sys 0.52.0",
] ]
[[package]]
name = "http"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258"
dependencies = [
"bytes",
"fnv",
"itoa",
]
[[package]]
name = "http-body"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184"
dependencies = [
"bytes",
"http",
]
[[package]]
name = "http-body-util"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f"
dependencies = [
"bytes",
"futures-util",
"http",
"http-body",
"pin-project-lite",
]
[[package]]
name = "httparse"
version = "1.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fcc0b4a115bf80b728eb8ea024ad5bd707b615bfed49e0665b6e0f86fd082d9"
[[package]] [[package]]
name = "humantime" name = "humantime"
version = "2.1.0" version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
[[package]]
name = "hyper"
version = "1.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "50dfd22e0e76d0f662d429a5f80fcaf3855009297eab6a0a9f8543834744ba05"
dependencies = [
"bytes",
"futures-channel",
"futures-util",
"h2",
"http",
"http-body",
"httparse",
"itoa",
"pin-project-lite",
"smallvec",
"tokio",
"want",
]
[[package]]
name = "hyper-rustls"
version = "0.27.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ee4be2c948921a1a5320b629c4193916ed787a7f7f293fd3f7f5a6c9de74155"
dependencies = [
"futures-util",
"http",
"hyper",
"hyper-util",
"rustls",
"rustls-pki-types",
"tokio",
"tokio-rustls",
"tower-service",
]
[[package]]
name = "hyper-tls"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0"
dependencies = [
"bytes",
"http-body-util",
"hyper",
"hyper-util",
"native-tls",
"tokio",
"tokio-native-tls",
"tower-service",
]
[[package]]
name = "hyper-util"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cde7055719c54e36e95e8719f95883f22072a48ede39db7fc17a4e1d5281e9b9"
dependencies = [
"bytes",
"futures-channel",
"futures-util",
"http",
"http-body",
"hyper",
"pin-project-lite",
"socket2",
"tokio",
"tower",
"tower-service",
"tracing",
]
[[package]] [[package]]
name = "iana-time-zone" name = "iana-time-zone"
version = "0.1.60" version = "0.1.60"
@ -901,6 +1095,12 @@ dependencies = [
"hashbrown", "hashbrown",
] ]
[[package]]
name = "ipnet"
version = "2.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3"
[[package]] [[package]]
name = "is_terminal_polyfill" name = "is_terminal_polyfill"
version = "1.70.1" version = "1.70.1"
@ -1001,6 +1201,12 @@ version = "2.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
[[package]]
name = "mime"
version = "0.3.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
[[package]] [[package]]
name = "minimal-lexical" name = "minimal-lexical"
version = "0.2.1" version = "0.2.1"
@ -1047,6 +1253,23 @@ dependencies = [
"windows-sys 0.52.0", "windows-sys 0.52.0",
] ]
[[package]]
name = "native-tls"
version = "0.2.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8614eb2c83d59d1c8cc974dd3f920198647674a0a035e1af1fa58707e317466"
dependencies = [
"libc",
"log",
"openssl",
"openssl-probe",
"openssl-sys",
"schannel",
"security-framework",
"security-framework-sys",
"tempfile",
]
[[package]] [[package]]
name = "nom" name = "nom"
version = "7.1.3" version = "7.1.3"
@ -1084,6 +1307,12 @@ dependencies = [
"num-traits", "num-traits",
] ]
[[package]]
name = "num-conv"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
[[package]] [[package]]
name = "num-integer" name = "num-integer"
version = "0.1.46" version = "0.1.46"
@ -1140,6 +1369,50 @@ version = "1.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
[[package]]
name = "openssl"
version = "0.10.66"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9529f4786b70a3e8c61e11179af17ab6188ad8d0ded78c5529441ed39d4bd9c1"
dependencies = [
"bitflags 2.6.0",
"cfg-if",
"foreign-types",
"libc",
"once_cell",
"openssl-macros",
"openssl-sys",
]
[[package]]
name = "openssl-macros"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "openssl-probe"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf"
[[package]]
name = "openssl-sys"
version = "0.9.103"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f9e8deee91df40a943c71b917e5874b951d32a802526c85721ce3b776c929d6"
dependencies = [
"cc",
"libc",
"pkg-config",
"vcpkg",
]
[[package]] [[package]]
name = "parking" name = "parking"
version = "2.2.0" version = "2.2.0"
@ -1190,6 +1463,26 @@ version = "2.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
[[package]]
name = "pin-project"
version = "1.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6bf43b791c5b9e34c3d182969b4abb522f9343702850a2e57f460d00d09b4b3"
dependencies = [
"pin-project-internal",
]
[[package]]
name = "pin-project-internal"
version = "1.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]] [[package]]
name = "pin-project-lite" name = "pin-project-lite"
version = "0.2.14" version = "0.2.14"
@ -1241,6 +1534,12 @@ dependencies = [
"miniz_oxide 0.3.7", "miniz_oxide 0.3.7",
] ]
[[package]]
name = "powerfmt"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
[[package]] [[package]]
name = "ppv-lite86" name = "ppv-lite86"
version = "0.2.20" version = "0.2.20"
@ -1298,6 +1597,17 @@ dependencies = [
"getrandom", "getrandom",
] ]
[[package]]
name = "ratelimit"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c1bb13e2dcfa2232ac6887157aad8d9b3fe4ca57f7c8d4938ff5ea9be742300"
dependencies = [
"clocksource",
"parking_lot",
"thiserror",
]
[[package]] [[package]]
name = "rayon" name = "rayon"
version = "1.10.0" version = "1.10.0"
@ -1365,6 +1675,65 @@ version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b"
[[package]]
name = "reqwest"
version = "0.12.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8f4955649ef5c38cc7f9e8aa41761d48fb9677197daea9984dc54f56aad5e63"
dependencies = [
"base64 0.22.1",
"bytes",
"encoding_rs",
"futures-channel",
"futures-core",
"futures-util",
"h2",
"http",
"http-body",
"http-body-util",
"hyper",
"hyper-rustls",
"hyper-tls",
"hyper-util",
"ipnet",
"js-sys",
"log",
"mime",
"native-tls",
"once_cell",
"percent-encoding",
"pin-project-lite",
"rustls-pemfile",
"serde",
"serde_json",
"serde_urlencoded",
"sync_wrapper",
"system-configuration",
"tokio",
"tokio-native-tls",
"tower-service",
"url",
"wasm-bindgen",
"wasm-bindgen-futures",
"web-sys",
"windows-registry",
]
[[package]]
name = "ring"
version = "0.17.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d"
dependencies = [
"cc",
"cfg-if",
"getrandom",
"libc",
"spin",
"untrusted",
"windows-sys 0.52.0",
]
[[package]] [[package]]
name = "rsa" name = "rsa"
version = "0.9.6" version = "0.9.6"
@ -1426,12 +1795,61 @@ dependencies = [
"windows-sys 0.52.0", "windows-sys 0.52.0",
] ]
[[package]]
name = "rustls"
version = "0.23.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c58f8c84392efc0a126acce10fa59ff7b3d2ac06ab451a33f2741989b806b044"
dependencies = [
"once_cell",
"rustls-pki-types",
"rustls-webpki",
"subtle",
"zeroize",
]
[[package]]
name = "rustls-pemfile"
version = "2.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "196fe16b00e106300d3e45ecfcb764fa292a535d7326a29a5875c579c7417425"
dependencies = [
"base64 0.22.1",
"rustls-pki-types",
]
[[package]]
name = "rustls-pki-types"
version = "1.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc0a2ce646f8655401bb81e7927b812614bd5d91dbc968696be50603510fcaf0"
[[package]]
name = "rustls-webpki"
version = "0.102.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e6b52d4fda176fd835fdc55a835d4a89b8499cad995885a21149d5ad62f852e"
dependencies = [
"ring",
"rustls-pki-types",
"untrusted",
]
[[package]] [[package]]
name = "ryu" name = "ryu"
version = "1.0.18" version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f"
[[package]]
name = "schannel"
version = "0.1.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fbc91545643bcf3a0bbb6569265615222618bdf33ce4ffbbd13c4bbd4c093534"
dependencies = [
"windows-sys 0.52.0",
]
[[package]] [[package]]
name = "scoped_threadpool" name = "scoped_threadpool"
version = "0.1.9" version = "0.1.9"
@ -1444,6 +1862,29 @@ version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "security-framework"
version = "2.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02"
dependencies = [
"bitflags 2.6.0",
"core-foundation",
"core-foundation-sys",
"libc",
"security-framework-sys",
]
[[package]]
name = "security-framework-sys"
version = "2.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75da29fe9b9b08fe9d6b22b5b4bcbc75d8db3aa31e639aa56bb62e9d46bfceaf"
dependencies = [
"core-foundation-sys",
"libc",
]
[[package]] [[package]]
name = "serde" name = "serde"
version = "1.0.207" version = "1.0.207"
@ -1820,6 +2261,36 @@ dependencies = [
"unicode-ident", "unicode-ident",
] ]
[[package]]
name = "sync_wrapper"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394"
dependencies = [
"futures-core",
]
[[package]]
name = "system-configuration"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b"
dependencies = [
"bitflags 2.6.0",
"core-foundation",
"system-configuration-sys",
]
[[package]]
name = "system-configuration-sys"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4"
dependencies = [
"core-foundation-sys",
"libc",
]
[[package]] [[package]]
name = "tempfile" name = "tempfile"
version = "3.12.0" version = "3.12.0"
@ -1864,6 +2335,37 @@ dependencies = [
"weezl", "weezl",
] ]
[[package]]
name = "time"
version = "0.3.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885"
dependencies = [
"deranged",
"itoa",
"num-conv",
"powerfmt",
"serde",
"time-core",
"time-macros",
]
[[package]]
name = "time-core"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3"
[[package]]
name = "time-macros"
version = "0.2.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f252a68540fde3a3877aeea552b832b40ab9a69e318efd078774a01ddee1ccf"
dependencies = [
"num-conv",
"time-core",
]
[[package]] [[package]]
name = "tinyvec" name = "tinyvec"
version = "1.8.0" version = "1.8.0"
@ -1908,6 +2410,27 @@ dependencies = [
"syn", "syn",
] ]
[[package]]
name = "tokio-native-tls"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2"
dependencies = [
"native-tls",
"tokio",
]
[[package]]
name = "tokio-rustls"
version = "0.26.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4"
dependencies = [
"rustls",
"rustls-pki-types",
"tokio",
]
[[package]] [[package]]
name = "tokio-stream" name = "tokio-stream"
version = "0.1.15" version = "0.1.15"
@ -1919,6 +2442,46 @@ dependencies = [
"tokio", "tokio",
] ]
[[package]]
name = "tokio-util"
version = "0.7.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9cf6b47b3771c49ac75ad09a6162f53ad4b8088b76ac60e8ec1455b31a189fe1"
dependencies = [
"bytes",
"futures-core",
"futures-sink",
"pin-project-lite",
"tokio",
]
[[package]]
name = "tower"
version = "0.4.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c"
dependencies = [
"futures-core",
"futures-util",
"pin-project",
"pin-project-lite",
"tokio",
"tower-layer",
"tower-service",
]
[[package]]
name = "tower-layer"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e"
[[package]]
name = "tower-service"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3"
[[package]] [[package]]
name = "tracing" name = "tracing"
version = "0.1.40" version = "0.1.40"
@ -1973,6 +2536,12 @@ version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "622b09ce2fe2df4618636fb92176d205662f59803f39e70d1c333393082de96c" checksum = "622b09ce2fe2df4618636fb92176d205662f59803f39e70d1c333393082de96c"
[[package]]
name = "try-lock"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
[[package]] [[package]]
name = "typenum" name = "typenum"
version = "1.17.0" version = "1.17.0"
@ -2012,6 +2581,12 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e"
[[package]]
name = "untrusted"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
[[package]] [[package]]
name = "url" name = "url"
version = "2.5.2" version = "2.5.2"
@ -2041,6 +2616,15 @@ version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
[[package]]
name = "want"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e"
dependencies = [
"try-lock",
]
[[package]] [[package]]
name = "wasi" name = "wasi"
version = "0.11.0+wasi-snapshot-preview1" version = "0.11.0+wasi-snapshot-preview1"
@ -2079,6 +2663,18 @@ dependencies = [
"wasm-bindgen-shared", "wasm-bindgen-shared",
] ]
[[package]]
name = "wasm-bindgen-futures"
version = "0.4.43"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61e9300f63a621e96ed275155c108eb6f843b6a26d053f122ab69724559dc8ed"
dependencies = [
"cfg-if",
"js-sys",
"wasm-bindgen",
"web-sys",
]
[[package]] [[package]]
name = "wasm-bindgen-macro" name = "wasm-bindgen-macro"
version = "0.2.93" version = "0.2.93"
@ -2108,6 +2704,16 @@ version = "0.2.93"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c62a0a307cb4a311d3a07867860911ca130c3494e8c2719593806c08bc5d0484" checksum = "c62a0a307cb4a311d3a07867860911ca130c3494e8c2719593806c08bc5d0484"
[[package]]
name = "web-sys"
version = "0.3.70"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26fdeaafd9bd129f65e7c031593c24d62186301e0c72c8978fa1678be7d532c0"
dependencies = [
"js-sys",
"wasm-bindgen",
]
[[package]] [[package]]
name = "web-time" name = "web-time"
version = "1.1.0" version = "1.1.0"
@ -2134,6 +2740,28 @@ dependencies = [
"wasite", "wasite",
] ]
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]] [[package]]
name = "windows-core" name = "windows-core"
version = "0.52.0" version = "0.52.0"
@ -2143,6 +2771,36 @@ dependencies = [
"windows-targets 0.52.6", "windows-targets 0.52.6",
] ]
[[package]]
name = "windows-registry"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e400001bb720a623c1c69032f8e3e4cf09984deec740f007dd2b03ec864804b0"
dependencies = [
"windows-result",
"windows-strings",
"windows-targets 0.52.6",
]
[[package]]
name = "windows-result"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e"
dependencies = [
"windows-targets 0.52.6",
]
[[package]]
name = "windows-strings"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10"
dependencies = [
"windows-result",
"windows-targets 0.52.6",
]
[[package]] [[package]]
name = "windows-sys" name = "windows-sys"
version = "0.48.0" version = "0.48.0"

View File

@ -20,3 +20,5 @@ bk-tree = "0.5"
hamming = "0.1" hamming = "0.1"
sqlx = { version = "0.8", features = [ "runtime-tokio", "migrate", "postgres" ] } sqlx = { version = "0.8", features = [ "runtime-tokio", "migrate", "postgres" ] }
image = ">=0.21,<0.24" image = ">=0.21,<0.24"
reqwest = { version = "0.12.7", features = ["blocking"] }
ratelimit = "0.9"

View File

@ -1,5 +1,13 @@
services: services:
data_generator:
image: git.anthrove.art/anthrove/demo-data-generator:v1.0.1
env_file:
- .env
restart: no
depends_on:
- db
db: db:
image: postgres image: postgres
ports: ports:

View File

@ -1,3 +1,4 @@
use anyhow::anyhow;
use sqlx::{Pool, Postgres}; use sqlx::{Pool, Postgres};
use sqlx::postgres::PgPoolOptions; use sqlx::postgres::PgPoolOptions;
@ -21,7 +22,7 @@ pub async fn migrate(pool: &Pool<Postgres>) -> anyhow::Result<()> {
} }
pub async fn get_all_hashes(pool: &Pool<Postgres>) -> anyhow::Result<Vec<FuzzyHash>> { pub async fn get_all_hashes(pool: &Pool<Postgres>) -> anyhow::Result<Vec<FuzzyHash>> {
let rows = sqlx::query!("SELECT id, hash, post_id FROM fuzzyhashes") let rows = sqlx::query!("SELECT id, hash, post_id FROM fuzzyhashes WHERE hash != 0")
.fetch_all(pool).await?; .fetch_all(pool).await?;
let mut data = vec![]; let mut data = vec![];
@ -36,3 +37,32 @@ pub async fn get_all_hashes(pool: &Pool<Postgres>) -> anyhow::Result<Vec<FuzzyHa
Ok(data) Ok(data)
} }
pub async fn create_hash(pool: &Pool<Postgres>, fuzzy: FuzzyHash) -> anyhow::Result<i32> {
let rec = sqlx::query!(r#"INSERT INTO "fuzzyhashes" (hash, post_id) VALUES ($1, $2) RETURNING id"#, fuzzy.hash, fuzzy.post_id).fetch_one(pool).await?;
Ok(rec.id)
}
pub async fn get_all_unprocessed_posts(pool: &Pool<Postgres>) -> anyhow::Result<Vec<String>> {
let rows = sqlx::query!(
r#"SELECT id FROM "Post" WHERE id NOT IN (SELECT fuzzyhashes.post_id FROM fuzzyhashes)"#
).fetch_all(pool).await?;
let mut ids = vec![];
for row in rows {
ids.push(row.id);
}
Ok(ids)
}
pub async fn get_preview_file_url(pool: &Pool<Postgres>, post_id: &str) -> anyhow::Result<String> {
let rec = sqlx::query!(
r#"SELECT sample_file_url FROM "PostReference" WHERE post_id = $1"#,
post_id
).fetch_one(pool).await?;
Ok(rec.sample_file_url.ok_or(anyhow!("sample_file_url not found for post_id"))?)
}

View File

@ -28,7 +28,7 @@ impl Tree {
} }
} }
/// Add a hash to the tree, returning if it already existed. /// Add a hash to the tree, returning if it was added.
pub async fn add(&self, hash: i64) -> bool { pub async fn add(&self, hash: i64) -> bool {
let node = Node::from(hash); let node = Node::from(hash);

View File

@ -1,7 +1,16 @@
use std::{env}; use std::{env};
use std::fs::remove_file;
use std::time::Duration;
use anyhow::anyhow;
use dotenvy::dotenv; use dotenvy::dotenv;
use env_logger::TimestampPrecision; use env_logger::TimestampPrecision;
use log::warn; use image::EncodableLayout;
use log::{debug, info, warn};
use ratelimit::Ratelimiter;
use tokio::fs::File;
use tokio::io;
use crate::db::FuzzyHash;
use crate::fuzzy::tree::HashDistance;
mod fuzzy; mod fuzzy;
mod db; mod db;
@ -12,23 +21,80 @@ async fn main() -> anyhow::Result<()> {
dotenv().ok(); dotenv().ok();
env_logger::builder() env_logger::builder()
.format_timestamp(Some(TimestampPrecision::Millis)) .format_timestamp(Some(TimestampPrecision::Millis))
.is_test(true)
.init(); .init();
info!("Connecting to database...");
let db_url = env::var("DATABASE_URL").expect("'DATABASE_URL' is required"); let db_url = env::var("DATABASE_URL").expect("'DATABASE_URL' is required");
let pool = db::connect(db_url.as_str()).await?; let pool = db::connect(db_url.as_str()).await?;
info!("Connecting success.");
info!("Migrating Database...");
db::migrate(&pool).await?; db::migrate(&pool).await?;
info!("Migration success.");
info!("Loading hashes...");
let fuzzy_hashes = db::get_all_hashes(&pool).await?; let fuzzy_hashes = db::get_all_hashes(&pool).await?;
let fuzzy_tree = fuzzy::tree::Tree::new(); let fuzzy_tree = fuzzy::tree::Tree::new();
for fuzzy_hash in fuzzy_hashes { for fuzzy_hash in fuzzy_hashes {
let exist = fuzzy_tree.add(fuzzy_hash.hash).await; let exist = fuzzy_tree.add(fuzzy_hash.hash).await;
if exist { if !exist {
warn!("found already existing hash: {}", fuzzy_hash.hash) warn!("found already existing hash: {}", fuzzy_hash.hash);
let founds = fuzzy_tree.find(vec!(HashDistance{
hash: fuzzy_hash.hash,
distance: 0
})).await;
founds[0].iter().for_each(|has_dist| warn!("Existing: {}", has_dist.hash))
}
}
info!("Load success");
let ratelimiter = Ratelimiter::builder(1, Duration::from_secs(1))
.build()?;
let ids = db::get_all_unprocessed_posts(&pool).await?;
for id in ids {
let preview_url = db::get_preview_file_url(&pool, id.as_str()).await?;
if preview_url.len() == 0 {
// Post is deleted or so.... !!
db::create_hash(&pool, FuzzyHash{
id: 0,
hash: 0,
post_id: id
}).await?;
continue
}
let file_name = preview_url.split("/").last().ok_or(anyhow!("problem while splitting url"))?;
let resp = reqwest::get(preview_url.as_str()).await?;
let body = resp.bytes().await?;
let mut out = File::create(file_name).await?;
io::copy(& mut body.as_bytes(), &mut out).await?;
let hash = fuzzy::image::generate_hash(file_name).await?;
let _ = remove_file(file_name)?;
let already_exists = fuzzy_tree.add(hash).await;
if already_exists {
info!("Post {id} already in tree with hash: {hash}")
}
db::create_hash(&pool, FuzzyHash{
id: 0,
hash,
post_id: id
}).await?;
if let Err(sleep) = ratelimiter.try_wait() {
tokio::time::sleep(sleep).await;
continue;
} }
} }