feat: more fuzzy search?
This commit is contained in:
parent
f9640b3d90
commit
0d4655e50b
@ -10,6 +10,7 @@ services:
|
||||
|
||||
db:
|
||||
image: postgres
|
||||
container_name: fuzzy_search_db
|
||||
ports:
|
||||
- 5432:5432
|
||||
environment:
|
||||
|
@ -1,6 +1,10 @@
|
||||
-- Add migration script here
|
||||
CREATE TABLE FuzzyHashes(
|
||||
id SERIAL NOT NULL PRIMARY KEY,
|
||||
hash BIGINT NOT NULL,
|
||||
mean_hash BIGINT NOT NULL,
|
||||
gradient_hash BIGINT NOT NULL,
|
||||
vert_gradient_hash BIGINT NOT NULL,
|
||||
double_gradient_hash BIGINT NOT NULL,
|
||||
block_hash BIGINT NOT NULL,
|
||||
post_id CHAR(25) NOT NULL
|
||||
)
|
16
src/db.rs
16
src/db.rs
@ -8,7 +8,11 @@ static MIGRATOR: Migrator = sqlx::migrate!(); // defaults to "./migrations"
|
||||
|
||||
pub struct FuzzyHash {
|
||||
pub id: i32,
|
||||
pub hash: i64,
|
||||
pub mean_hash: i64,
|
||||
pub gradient_hash: i64,
|
||||
pub vert_gradient_hash: i64,
|
||||
pub double_gradient_hash: i64,
|
||||
pub block_hash: i64,
|
||||
pub post_id: String,
|
||||
}
|
||||
|
||||
@ -22,7 +26,7 @@ pub async fn migrate(pool: &Pool<Postgres>) -> anyhow::Result<()> {
|
||||
}
|
||||
|
||||
pub async fn get_all_hashes(pool: &Pool<Postgres>) -> anyhow::Result<Vec<FuzzyHash>> {
|
||||
let rows = sqlx::query!("SELECT id, hash, post_id FROM fuzzyhashes WHERE hash != 0")
|
||||
let rows = sqlx::query!("SELECT id, mean_hash, gradient_hash, vert_gradient_hash, double_gradient_hash, block_hash, post_id FROM fuzzyhashes WHERE mean_hash > 0 AND gradient_hash > 0 AND vert_gradient_hash > 0 AND double_gradient_hash > 0 AND block_hash > 0")
|
||||
.fetch_all(pool).await?;
|
||||
|
||||
let mut data = vec![];
|
||||
@ -30,7 +34,11 @@ pub async fn get_all_hashes(pool: &Pool<Postgres>) -> anyhow::Result<Vec<FuzzyHa
|
||||
for row in rows {
|
||||
data.push(FuzzyHash{
|
||||
id: row.id,
|
||||
hash: row.hash,
|
||||
mean_hash: row.mean_hash,
|
||||
gradient_hash: row.gradient_hash,
|
||||
vert_gradient_hash: row.vert_gradient_hash,
|
||||
double_gradient_hash: row.double_gradient_hash,
|
||||
block_hash: row.block_hash,
|
||||
post_id: row.post_id,
|
||||
});
|
||||
}
|
||||
@ -39,7 +47,7 @@ pub async fn get_all_hashes(pool: &Pool<Postgres>) -> anyhow::Result<Vec<FuzzyHa
|
||||
}
|
||||
|
||||
pub async fn create_hash(pool: &Pool<Postgres>, fuzzy: FuzzyHash) -> anyhow::Result<i32> {
|
||||
let rec = sqlx::query!(r#"INSERT INTO "fuzzyhashes" (hash, post_id) VALUES ($1, $2) RETURNING id"#, fuzzy.hash, fuzzy.post_id).fetch_one(pool).await?;
|
||||
let rec = sqlx::query!(r#"INSERT INTO "fuzzyhashes" (mean_hash, gradient_hash, vert_gradient_hash, double_gradient_hash, block_hash, post_id) VALUES ($1, $2, $3, $4, $5, $6) RETURNING id"#, fuzzy.mean_hash, fuzzy.gradient_hash, fuzzy.vert_gradient_hash, fuzzy.double_gradient_hash, fuzzy.block_hash, fuzzy.post_id).fetch_one(pool).await?;
|
||||
|
||||
Ok(rec.id)
|
||||
}
|
||||
|
@ -1,11 +1,12 @@
|
||||
use image;
|
||||
use img_hash::HashAlg;
|
||||
use img_hash::HasherConfig;
|
||||
|
||||
pub async fn generate_hash(path: &str) -> anyhow::Result<i64> {
|
||||
pub async fn generate_hash(path: &str, alg: HashAlg) -> anyhow::Result<i64> {
|
||||
let img = image::open(path)?;
|
||||
use img_hash::{HashAlg::Gradient, HasherConfig};
|
||||
|
||||
let img_hasher = HasherConfig::with_bytes_type::<[u8; 8]>()
|
||||
.hash_alg(Gradient)
|
||||
.hash_alg(alg)
|
||||
.hash_size(8, 8)
|
||||
.preproc_dct()
|
||||
.to_hasher();
|
||||
|
67
src/main.rs
67
src/main.rs
@ -5,6 +5,7 @@ use anyhow::anyhow;
|
||||
use dotenvy::dotenv;
|
||||
use env_logger::TimestampPrecision;
|
||||
use image::EncodableLayout;
|
||||
use img_hash::HashAlg;
|
||||
use log::{debug, info, warn};
|
||||
use ratelimit::Ratelimiter;
|
||||
use tokio::fs::File;
|
||||
@ -34,20 +35,31 @@ async fn main() -> anyhow::Result<()> {
|
||||
|
||||
info!("Loading hashes...");
|
||||
let fuzzy_hashes = db::get_all_hashes(&pool).await?;
|
||||
let fuzzy_tree = fuzzy::tree::Tree::new();
|
||||
let gradient_tree = fuzzy::tree::Tree::new();
|
||||
let vert_gradient_tree = fuzzy::tree::Tree::new();
|
||||
let double_gradient_tree = fuzzy::tree::Tree::new();
|
||||
let block_tree = fuzzy::tree::Tree::new();
|
||||
|
||||
for fuzzy_hash in fuzzy_hashes {
|
||||
let exist = fuzzy_tree.add(fuzzy_hash.hash).await;
|
||||
let exist_gradient = gradient_tree.add(fuzzy_hash.gradient_hash).await;
|
||||
let exist_vert_gradient = vert_gradient_tree.add(fuzzy_hash.vert_gradient_hash).await;
|
||||
let exist_double_gradient = double_gradient_tree.add(fuzzy_hash.double_gradient_hash).await;
|
||||
let exist_block_hash = block_tree.add(fuzzy_hash.block_hash).await;
|
||||
|
||||
if !exist {
|
||||
warn!("found already existing hash: {}", fuzzy_hash.hash);
|
||||
let founds = fuzzy_tree.find(vec!(HashDistance{
|
||||
hash: fuzzy_hash.hash,
|
||||
distance: 0
|
||||
})).await;
|
||||
|
||||
founds[0].iter().for_each(|has_dist| warn!("Existing: {}", has_dist.hash))
|
||||
if !exist_gradient {
|
||||
warn!("found already existing hash (gradient): {}", fuzzy_hash.gradient_hash);
|
||||
}
|
||||
if !exist_vert_gradient {
|
||||
warn!("found already existing hash (vert_gradient): {}", fuzzy_hash.vert_gradient_hash);
|
||||
}
|
||||
if !exist_double_gradient {
|
||||
warn!("found already existing hash (double_gradient): {}", fuzzy_hash.double_gradient_hash);
|
||||
}
|
||||
if !exist_block_hash {
|
||||
warn!("found already existing hash (block_hash): {}", fuzzy_hash.block_hash);
|
||||
}
|
||||
info!("use {:?}", fuzzy_hash.post_id)
|
||||
|
||||
}
|
||||
info!("Load success");
|
||||
|
||||
@ -61,7 +73,11 @@ async fn main() -> anyhow::Result<()> {
|
||||
// Post is deleted or so.... !!
|
||||
db::create_hash(&pool, FuzzyHash{
|
||||
id: 0,
|
||||
hash: 0,
|
||||
mean_hash: 0,
|
||||
gradient_hash: 0,
|
||||
block_hash: 0,
|
||||
double_gradient_hash: 0,
|
||||
vert_gradient_hash: 0,
|
||||
post_id: id
|
||||
}).await?;
|
||||
continue
|
||||
@ -76,19 +92,38 @@ async fn main() -> anyhow::Result<()> {
|
||||
let mut out = File::create(file_name).await?;
|
||||
io::copy(& mut body.as_bytes(), &mut out).await?;
|
||||
|
||||
let hash = fuzzy::image::generate_hash(file_name).await?;
|
||||
let gradient_hash = fuzzy::image::generate_hash(file_name, HashAlg::Gradient).await?;
|
||||
let vert_gradient_hash = fuzzy::image::generate_hash(file_name, HashAlg::VertGradient).await?;
|
||||
let double_gradient_hash = fuzzy::image::generate_hash(file_name, HashAlg::DoubleGradient).await?;
|
||||
let block_hash = fuzzy::image::generate_hash(file_name, HashAlg::Blockhash).await?;
|
||||
|
||||
let _ = remove_file(file_name)?;
|
||||
|
||||
let already_exists = fuzzy_tree.add(hash).await;
|
||||
let exist_gradient = gradient_tree.add(gradient_hash).await;
|
||||
let exist_vert_gradient = vert_gradient_tree.add(vert_gradient_hash).await;
|
||||
let exist_double_gradient = double_gradient_tree.add(double_gradient_hash).await;
|
||||
let exist_block_hash = block_tree.add(block_hash).await;
|
||||
|
||||
if already_exists {
|
||||
info!("Post {id} already in tree with hash: {hash}")
|
||||
if !exist_gradient {
|
||||
warn!("found already existing hash (gradient): {}", gradient_hash);
|
||||
}
|
||||
if !exist_vert_gradient {
|
||||
warn!("found already existing hash (vert_gradient): {}", vert_gradient_hash);
|
||||
}
|
||||
if !exist_double_gradient {
|
||||
warn!("found already existing hash (double_gradient): {}", double_gradient_hash);
|
||||
}
|
||||
if !exist_block_hash {
|
||||
warn!("found already existing hash (block_hash): {}", block_hash);
|
||||
}
|
||||
|
||||
db::create_hash(&pool, FuzzyHash{
|
||||
id: 0,
|
||||
hash,
|
||||
mean_hash: 0,
|
||||
gradient_hash,
|
||||
vert_gradient_hash,
|
||||
double_gradient_hash,
|
||||
block_hash,
|
||||
post_id: id
|
||||
}).await?;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user