feat: more fuzzy search?
This commit is contained in:
parent
f9640b3d90
commit
0d4655e50b
@ -10,6 +10,7 @@ services:
|
|||||||
|
|
||||||
db:
|
db:
|
||||||
image: postgres
|
image: postgres
|
||||||
|
container_name: fuzzy_search_db
|
||||||
ports:
|
ports:
|
||||||
- 5432:5432
|
- 5432:5432
|
||||||
environment:
|
environment:
|
||||||
|
@ -1,6 +1,10 @@
|
|||||||
-- Add migration script here
|
-- Add migration script here
|
||||||
CREATE TABLE FuzzyHashes(
|
CREATE TABLE FuzzyHashes(
|
||||||
id SERIAL NOT NULL PRIMARY KEY,
|
id SERIAL NOT NULL PRIMARY KEY,
|
||||||
hash BIGINT NOT NULL,
|
mean_hash BIGINT NOT NULL,
|
||||||
|
gradient_hash BIGINT NOT NULL,
|
||||||
|
vert_gradient_hash BIGINT NOT NULL,
|
||||||
|
double_gradient_hash BIGINT NOT NULL,
|
||||||
|
block_hash BIGINT NOT NULL,
|
||||||
post_id CHAR(25) NOT NULL
|
post_id CHAR(25) NOT NULL
|
||||||
)
|
)
|
16
src/db.rs
16
src/db.rs
@ -8,7 +8,11 @@ static MIGRATOR: Migrator = sqlx::migrate!(); // defaults to "./migrations"
|
|||||||
|
|
||||||
pub struct FuzzyHash {
|
pub struct FuzzyHash {
|
||||||
pub id: i32,
|
pub id: i32,
|
||||||
pub hash: i64,
|
pub mean_hash: i64,
|
||||||
|
pub gradient_hash: i64,
|
||||||
|
pub vert_gradient_hash: i64,
|
||||||
|
pub double_gradient_hash: i64,
|
||||||
|
pub block_hash: i64,
|
||||||
pub post_id: String,
|
pub post_id: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -22,7 +26,7 @@ pub async fn migrate(pool: &Pool<Postgres>) -> anyhow::Result<()> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub async fn get_all_hashes(pool: &Pool<Postgres>) -> anyhow::Result<Vec<FuzzyHash>> {
|
pub async fn get_all_hashes(pool: &Pool<Postgres>) -> anyhow::Result<Vec<FuzzyHash>> {
|
||||||
let rows = sqlx::query!("SELECT id, hash, post_id FROM fuzzyhashes WHERE hash != 0")
|
let rows = sqlx::query!("SELECT id, mean_hash, gradient_hash, vert_gradient_hash, double_gradient_hash, block_hash, post_id FROM fuzzyhashes WHERE mean_hash > 0 AND gradient_hash > 0 AND vert_gradient_hash > 0 AND double_gradient_hash > 0 AND block_hash > 0")
|
||||||
.fetch_all(pool).await?;
|
.fetch_all(pool).await?;
|
||||||
|
|
||||||
let mut data = vec![];
|
let mut data = vec![];
|
||||||
@ -30,7 +34,11 @@ pub async fn get_all_hashes(pool: &Pool<Postgres>) -> anyhow::Result<Vec<FuzzyHa
|
|||||||
for row in rows {
|
for row in rows {
|
||||||
data.push(FuzzyHash{
|
data.push(FuzzyHash{
|
||||||
id: row.id,
|
id: row.id,
|
||||||
hash: row.hash,
|
mean_hash: row.mean_hash,
|
||||||
|
gradient_hash: row.gradient_hash,
|
||||||
|
vert_gradient_hash: row.vert_gradient_hash,
|
||||||
|
double_gradient_hash: row.double_gradient_hash,
|
||||||
|
block_hash: row.block_hash,
|
||||||
post_id: row.post_id,
|
post_id: row.post_id,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -39,7 +47,7 @@ pub async fn get_all_hashes(pool: &Pool<Postgres>) -> anyhow::Result<Vec<FuzzyHa
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub async fn create_hash(pool: &Pool<Postgres>, fuzzy: FuzzyHash) -> anyhow::Result<i32> {
|
pub async fn create_hash(pool: &Pool<Postgres>, fuzzy: FuzzyHash) -> anyhow::Result<i32> {
|
||||||
let rec = sqlx::query!(r#"INSERT INTO "fuzzyhashes" (hash, post_id) VALUES ($1, $2) RETURNING id"#, fuzzy.hash, fuzzy.post_id).fetch_one(pool).await?;
|
let rec = sqlx::query!(r#"INSERT INTO "fuzzyhashes" (mean_hash, gradient_hash, vert_gradient_hash, double_gradient_hash, block_hash, post_id) VALUES ($1, $2, $3, $4, $5, $6) RETURNING id"#, fuzzy.mean_hash, fuzzy.gradient_hash, fuzzy.vert_gradient_hash, fuzzy.double_gradient_hash, fuzzy.block_hash, fuzzy.post_id).fetch_one(pool).await?;
|
||||||
|
|
||||||
Ok(rec.id)
|
Ok(rec.id)
|
||||||
}
|
}
|
||||||
|
@ -1,11 +1,12 @@
|
|||||||
use image;
|
use image;
|
||||||
|
use img_hash::HashAlg;
|
||||||
|
use img_hash::HasherConfig;
|
||||||
|
|
||||||
pub async fn generate_hash(path: &str) -> anyhow::Result<i64> {
|
pub async fn generate_hash(path: &str, alg: HashAlg) -> anyhow::Result<i64> {
|
||||||
let img = image::open(path)?;
|
let img = image::open(path)?;
|
||||||
use img_hash::{HashAlg::Gradient, HasherConfig};
|
|
||||||
|
|
||||||
let img_hasher = HasherConfig::with_bytes_type::<[u8; 8]>()
|
let img_hasher = HasherConfig::with_bytes_type::<[u8; 8]>()
|
||||||
.hash_alg(Gradient)
|
.hash_alg(alg)
|
||||||
.hash_size(8, 8)
|
.hash_size(8, 8)
|
||||||
.preproc_dct()
|
.preproc_dct()
|
||||||
.to_hasher();
|
.to_hasher();
|
||||||
|
67
src/main.rs
67
src/main.rs
@ -5,6 +5,7 @@ use anyhow::anyhow;
|
|||||||
use dotenvy::dotenv;
|
use dotenvy::dotenv;
|
||||||
use env_logger::TimestampPrecision;
|
use env_logger::TimestampPrecision;
|
||||||
use image::EncodableLayout;
|
use image::EncodableLayout;
|
||||||
|
use img_hash::HashAlg;
|
||||||
use log::{debug, info, warn};
|
use log::{debug, info, warn};
|
||||||
use ratelimit::Ratelimiter;
|
use ratelimit::Ratelimiter;
|
||||||
use tokio::fs::File;
|
use tokio::fs::File;
|
||||||
@ -34,20 +35,31 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
|
|
||||||
info!("Loading hashes...");
|
info!("Loading hashes...");
|
||||||
let fuzzy_hashes = db::get_all_hashes(&pool).await?;
|
let fuzzy_hashes = db::get_all_hashes(&pool).await?;
|
||||||
let fuzzy_tree = fuzzy::tree::Tree::new();
|
let gradient_tree = fuzzy::tree::Tree::new();
|
||||||
|
let vert_gradient_tree = fuzzy::tree::Tree::new();
|
||||||
|
let double_gradient_tree = fuzzy::tree::Tree::new();
|
||||||
|
let block_tree = fuzzy::tree::Tree::new();
|
||||||
|
|
||||||
for fuzzy_hash in fuzzy_hashes {
|
for fuzzy_hash in fuzzy_hashes {
|
||||||
let exist = fuzzy_tree.add(fuzzy_hash.hash).await;
|
let exist_gradient = gradient_tree.add(fuzzy_hash.gradient_hash).await;
|
||||||
|
let exist_vert_gradient = vert_gradient_tree.add(fuzzy_hash.vert_gradient_hash).await;
|
||||||
|
let exist_double_gradient = double_gradient_tree.add(fuzzy_hash.double_gradient_hash).await;
|
||||||
|
let exist_block_hash = block_tree.add(fuzzy_hash.block_hash).await;
|
||||||
|
|
||||||
if !exist {
|
if !exist_gradient {
|
||||||
warn!("found already existing hash: {}", fuzzy_hash.hash);
|
warn!("found already existing hash (gradient): {}", fuzzy_hash.gradient_hash);
|
||||||
let founds = fuzzy_tree.find(vec!(HashDistance{
|
|
||||||
hash: fuzzy_hash.hash,
|
|
||||||
distance: 0
|
|
||||||
})).await;
|
|
||||||
|
|
||||||
founds[0].iter().for_each(|has_dist| warn!("Existing: {}", has_dist.hash))
|
|
||||||
}
|
}
|
||||||
|
if !exist_vert_gradient {
|
||||||
|
warn!("found already existing hash (vert_gradient): {}", fuzzy_hash.vert_gradient_hash);
|
||||||
|
}
|
||||||
|
if !exist_double_gradient {
|
||||||
|
warn!("found already existing hash (double_gradient): {}", fuzzy_hash.double_gradient_hash);
|
||||||
|
}
|
||||||
|
if !exist_block_hash {
|
||||||
|
warn!("found already existing hash (block_hash): {}", fuzzy_hash.block_hash);
|
||||||
|
}
|
||||||
|
info!("use {:?}", fuzzy_hash.post_id)
|
||||||
|
|
||||||
}
|
}
|
||||||
info!("Load success");
|
info!("Load success");
|
||||||
|
|
||||||
@ -61,7 +73,11 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
// Post is deleted or so.... !!
|
// Post is deleted or so.... !!
|
||||||
db::create_hash(&pool, FuzzyHash{
|
db::create_hash(&pool, FuzzyHash{
|
||||||
id: 0,
|
id: 0,
|
||||||
hash: 0,
|
mean_hash: 0,
|
||||||
|
gradient_hash: 0,
|
||||||
|
block_hash: 0,
|
||||||
|
double_gradient_hash: 0,
|
||||||
|
vert_gradient_hash: 0,
|
||||||
post_id: id
|
post_id: id
|
||||||
}).await?;
|
}).await?;
|
||||||
continue
|
continue
|
||||||
@ -76,19 +92,38 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
let mut out = File::create(file_name).await?;
|
let mut out = File::create(file_name).await?;
|
||||||
io::copy(& mut body.as_bytes(), &mut out).await?;
|
io::copy(& mut body.as_bytes(), &mut out).await?;
|
||||||
|
|
||||||
let hash = fuzzy::image::generate_hash(file_name).await?;
|
let gradient_hash = fuzzy::image::generate_hash(file_name, HashAlg::Gradient).await?;
|
||||||
|
let vert_gradient_hash = fuzzy::image::generate_hash(file_name, HashAlg::VertGradient).await?;
|
||||||
|
let double_gradient_hash = fuzzy::image::generate_hash(file_name, HashAlg::DoubleGradient).await?;
|
||||||
|
let block_hash = fuzzy::image::generate_hash(file_name, HashAlg::Blockhash).await?;
|
||||||
|
|
||||||
let _ = remove_file(file_name)?;
|
let _ = remove_file(file_name)?;
|
||||||
|
|
||||||
let already_exists = fuzzy_tree.add(hash).await;
|
let exist_gradient = gradient_tree.add(gradient_hash).await;
|
||||||
|
let exist_vert_gradient = vert_gradient_tree.add(vert_gradient_hash).await;
|
||||||
|
let exist_double_gradient = double_gradient_tree.add(double_gradient_hash).await;
|
||||||
|
let exist_block_hash = block_tree.add(block_hash).await;
|
||||||
|
|
||||||
if already_exists {
|
if !exist_gradient {
|
||||||
info!("Post {id} already in tree with hash: {hash}")
|
warn!("found already existing hash (gradient): {}", gradient_hash);
|
||||||
|
}
|
||||||
|
if !exist_vert_gradient {
|
||||||
|
warn!("found already existing hash (vert_gradient): {}", vert_gradient_hash);
|
||||||
|
}
|
||||||
|
if !exist_double_gradient {
|
||||||
|
warn!("found already existing hash (double_gradient): {}", double_gradient_hash);
|
||||||
|
}
|
||||||
|
if !exist_block_hash {
|
||||||
|
warn!("found already existing hash (block_hash): {}", block_hash);
|
||||||
}
|
}
|
||||||
|
|
||||||
db::create_hash(&pool, FuzzyHash{
|
db::create_hash(&pool, FuzzyHash{
|
||||||
id: 0,
|
id: 0,
|
||||||
hash,
|
mean_hash: 0,
|
||||||
|
gradient_hash,
|
||||||
|
vert_gradient_hash,
|
||||||
|
double_gradient_hash,
|
||||||
|
block_hash,
|
||||||
post_id: id
|
post_id: id
|
||||||
}).await?;
|
}).await?;
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user