From 0d4655e50bf5d8006170d799f9c81ae0765c3f27 Mon Sep 17 00:00:00 2001 From: Lennard Brinkhaus Date: Sat, 21 Dec 2024 16:56:44 +0100 Subject: [PATCH] feat: more fuzzy search? --- docker-compose.yml | 1 + .../20240815064629_initial-database.sql | 6 +- src/db.rs | 16 +++-- src/fuzzy/image.rs | 7 +- src/main.rs | 67 ++++++++++++++----- 5 files changed, 73 insertions(+), 24 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index cda92af..774cb27 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -10,6 +10,7 @@ services: db: image: postgres + container_name: fuzzy_search_db ports: - 5432:5432 environment: diff --git a/migrations/20240815064629_initial-database.sql b/migrations/20240815064629_initial-database.sql index a8c042d..9c8d4e8 100644 --- a/migrations/20240815064629_initial-database.sql +++ b/migrations/20240815064629_initial-database.sql @@ -1,6 +1,10 @@ -- Add migration script here CREATE TABLE FuzzyHashes( id SERIAL NOT NULL PRIMARY KEY, - hash BIGINT NOT NULL, + mean_hash BIGINT NOT NULL, + gradient_hash BIGINT NOT NULL, + vert_gradient_hash BIGINT NOT NULL, + double_gradient_hash BIGINT NOT NULL, + block_hash BIGINT NOT NULL, post_id CHAR(25) NOT NULL ) \ No newline at end of file diff --git a/src/db.rs b/src/db.rs index 3ac712e..22314ad 100644 --- a/src/db.rs +++ b/src/db.rs @@ -8,7 +8,11 @@ static MIGRATOR: Migrator = sqlx::migrate!(); // defaults to "./migrations" pub struct FuzzyHash { pub id: i32, - pub hash: i64, + pub mean_hash: i64, + pub gradient_hash: i64, + pub vert_gradient_hash: i64, + pub double_gradient_hash: i64, + pub block_hash: i64, pub post_id: String, } @@ -22,7 +26,7 @@ pub async fn migrate(pool: &Pool) -> anyhow::Result<()> { } pub async fn get_all_hashes(pool: &Pool) -> anyhow::Result> { - let rows = sqlx::query!("SELECT id, hash, post_id FROM fuzzyhashes WHERE hash != 0") + let rows = sqlx::query!("SELECT id, mean_hash, gradient_hash, vert_gradient_hash, double_gradient_hash, block_hash, post_id FROM fuzzyhashes WHERE mean_hash > 0 AND gradient_hash > 0 AND vert_gradient_hash > 0 AND double_gradient_hash > 0 AND block_hash > 0") .fetch_all(pool).await?; let mut data = vec![]; @@ -30,7 +34,11 @@ pub async fn get_all_hashes(pool: &Pool) -> anyhow::Result) -> anyhow::Result, fuzzy: FuzzyHash) -> anyhow::Result { - let rec = sqlx::query!(r#"INSERT INTO "fuzzyhashes" (hash, post_id) VALUES ($1, $2) RETURNING id"#, fuzzy.hash, fuzzy.post_id).fetch_one(pool).await?; + let rec = sqlx::query!(r#"INSERT INTO "fuzzyhashes" (mean_hash, gradient_hash, vert_gradient_hash, double_gradient_hash, block_hash, post_id) VALUES ($1, $2, $3, $4, $5, $6) RETURNING id"#, fuzzy.mean_hash, fuzzy.gradient_hash, fuzzy.vert_gradient_hash, fuzzy.double_gradient_hash, fuzzy.block_hash, fuzzy.post_id).fetch_one(pool).await?; Ok(rec.id) } diff --git a/src/fuzzy/image.rs b/src/fuzzy/image.rs index 63096e1..691be2e 100644 --- a/src/fuzzy/image.rs +++ b/src/fuzzy/image.rs @@ -1,11 +1,12 @@ use image; +use img_hash::HashAlg; +use img_hash::HasherConfig; -pub async fn generate_hash(path: &str) -> anyhow::Result { +pub async fn generate_hash(path: &str, alg: HashAlg) -> anyhow::Result { let img = image::open(path)?; - use img_hash::{HashAlg::Gradient, HasherConfig}; let img_hasher = HasherConfig::with_bytes_type::<[u8; 8]>() - .hash_alg(Gradient) + .hash_alg(alg) .hash_size(8, 8) .preproc_dct() .to_hasher(); diff --git a/src/main.rs b/src/main.rs index 38ac58c..63df6ec 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,6 +5,7 @@ use anyhow::anyhow; use dotenvy::dotenv; use env_logger::TimestampPrecision; use image::EncodableLayout; +use img_hash::HashAlg; use log::{debug, info, warn}; use ratelimit::Ratelimiter; use tokio::fs::File; @@ -34,20 +35,31 @@ async fn main() -> anyhow::Result<()> { info!("Loading hashes..."); let fuzzy_hashes = db::get_all_hashes(&pool).await?; - let fuzzy_tree = fuzzy::tree::Tree::new(); + let gradient_tree = fuzzy::tree::Tree::new(); + let vert_gradient_tree = fuzzy::tree::Tree::new(); + let double_gradient_tree = fuzzy::tree::Tree::new(); + let block_tree = fuzzy::tree::Tree::new(); for fuzzy_hash in fuzzy_hashes { - let exist = fuzzy_tree.add(fuzzy_hash.hash).await; + let exist_gradient = gradient_tree.add(fuzzy_hash.gradient_hash).await; + let exist_vert_gradient = vert_gradient_tree.add(fuzzy_hash.vert_gradient_hash).await; + let exist_double_gradient = double_gradient_tree.add(fuzzy_hash.double_gradient_hash).await; + let exist_block_hash = block_tree.add(fuzzy_hash.block_hash).await; - if !exist { - warn!("found already existing hash: {}", fuzzy_hash.hash); - let founds = fuzzy_tree.find(vec!(HashDistance{ - hash: fuzzy_hash.hash, - distance: 0 - })).await; - - founds[0].iter().for_each(|has_dist| warn!("Existing: {}", has_dist.hash)) + if !exist_gradient { + warn!("found already existing hash (gradient): {}", fuzzy_hash.gradient_hash); } + if !exist_vert_gradient { + warn!("found already existing hash (vert_gradient): {}", fuzzy_hash.vert_gradient_hash); + } + if !exist_double_gradient { + warn!("found already existing hash (double_gradient): {}", fuzzy_hash.double_gradient_hash); + } + if !exist_block_hash { + warn!("found already existing hash (block_hash): {}", fuzzy_hash.block_hash); + } + info!("use {:?}", fuzzy_hash.post_id) + } info!("Load success"); @@ -61,7 +73,11 @@ async fn main() -> anyhow::Result<()> { // Post is deleted or so.... !! db::create_hash(&pool, FuzzyHash{ id: 0, - hash: 0, + mean_hash: 0, + gradient_hash: 0, + block_hash: 0, + double_gradient_hash: 0, + vert_gradient_hash: 0, post_id: id }).await?; continue @@ -76,19 +92,38 @@ async fn main() -> anyhow::Result<()> { let mut out = File::create(file_name).await?; io::copy(& mut body.as_bytes(), &mut out).await?; - let hash = fuzzy::image::generate_hash(file_name).await?; + let gradient_hash = fuzzy::image::generate_hash(file_name, HashAlg::Gradient).await?; + let vert_gradient_hash = fuzzy::image::generate_hash(file_name, HashAlg::VertGradient).await?; + let double_gradient_hash = fuzzy::image::generate_hash(file_name, HashAlg::DoubleGradient).await?; + let block_hash = fuzzy::image::generate_hash(file_name, HashAlg::Blockhash).await?; let _ = remove_file(file_name)?; - let already_exists = fuzzy_tree.add(hash).await; + let exist_gradient = gradient_tree.add(gradient_hash).await; + let exist_vert_gradient = vert_gradient_tree.add(vert_gradient_hash).await; + let exist_double_gradient = double_gradient_tree.add(double_gradient_hash).await; + let exist_block_hash = block_tree.add(block_hash).await; - if already_exists { - info!("Post {id} already in tree with hash: {hash}") + if !exist_gradient { + warn!("found already existing hash (gradient): {}", gradient_hash); + } + if !exist_vert_gradient { + warn!("found already existing hash (vert_gradient): {}", vert_gradient_hash); + } + if !exist_double_gradient { + warn!("found already existing hash (double_gradient): {}", double_gradient_hash); + } + if !exist_block_hash { + warn!("found already existing hash (block_hash): {}", block_hash); } db::create_hash(&pool, FuzzyHash{ id: 0, - hash, + mean_hash: 0, + gradient_hash, + vert_gradient_hash, + double_gradient_hash, + block_hash, post_id: id }).await?;