Skip to content

Commit

Permalink
use stash phash command (ThePornDatabase#119)
Browse files Browse the repository at this point in the history
Co-authored-by: 4c0d3r <nope>
  • Loading branch information
4c0d3r authored Dec 10, 2022
1 parent e376fdd commit 4205c5d
Show file tree
Hide file tree
Showing 8 changed files with 84 additions and 45 deletions.
1 change: 0 additions & 1 deletion .dockerignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
.coverage
.coveragerc
.git
.github
.gitignore
.pytest_cache
Expand Down
18 changes: 11 additions & 7 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,9 @@ jobs:
coverage:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
with:
submodules: true

- name: Set up Python
uses: actions/setup-python@v4
Expand All @@ -65,6 +67,10 @@ jobs:

- name: Display Python version
run: python --version

- uses: actions/setup-go@v3
with:
go-version: '>=1.19.0'

- name: Install ffmpeg
run: |
Expand All @@ -89,11 +95,7 @@ jobs:
cache: 'yarn'
node-version: '16'

- name: Install Node packages
run: yarn install

- name: Build Assets
run: yarn run build
- run: poetry run poe build_all

- run: poetry run pytest --cov

Expand All @@ -109,7 +111,9 @@ jobs:
dockerbuild:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
with:
submodules: true
- run: ./docker_build.sh

auto-tag:
Expand Down
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,6 @@ node_modules
.env
namer/web/public/assets
namer/web/templates
namer/videohashtools
namer/videohashtools/phashcompare*
namer/videohashtools/videohashes*
tools
10 changes: 4 additions & 6 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ RUN apt-get update \
wget \
gnupg2 \
xvfb \
golang \
git \
&& rm -rf /var/lib/apt/lists/* \
&& rm -Rf /usr/share/doc && rm -Rf /usr/share/man \
&& apt-get clean
Expand Down Expand Up @@ -58,12 +60,8 @@ ENV PATH="/root/.local/bin:$PATH"
WORKDIR /work
RUN rm -rf /work/namer/__pycache__/ || true \
&& rm -rf /work/test/__pycache__/ || true \
&& poetry install \
&& yarn install \
&& yarn run build \
&& poetry run flakeheaven lint \
&& poetry build
RUN ( Xvfb :99 & cd /work/ && poetry run pytest )
&& poetry install
RUN ( Xvfb :99 & cd /work/ && poetry run poe build_all )

FROM base
COPY --from=build /work/dist/namer-*.tar.gz /
Expand Down
28 changes: 23 additions & 5 deletions namer/configuration_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import random
import re
import tempfile
import shutil
from importlib import resources
from typing import Dict, List, Optional, Callable, Pattern, Any, Tuple
from configupdater import ConfigUpdater
Expand Down Expand Up @@ -296,16 +297,33 @@ def from_config(config: ConfigUpdater, namer_config: NamerConfig) -> NamerConfig
return namer_config


def resource_file_to_str(package: str, file_name: str) -> str:
config_str = ""
if hasattr(resources, 'files'):
config_str = resources.files(package).joinpath(file_name).read_text()
elif hasattr(resources, 'read_text'):
config_str = resources.read_text(package, file_name)
return config_str


def copy_resource_to_file(package: str, file_name: str, output: Path) -> bool:
if hasattr(resources, 'files'):
with resources.files(package).joinpath(file_name).open("rb") as bin, open(output, mode="+bw") as out:
shutil.copyfileobj(bin, out)
return True
elif hasattr(resources, 'read_text'):
with resources.open_binary(package, file_name) as bin, open(output, mode="+bw") as out:
shutil.copyfileobj(bin, out)
return True
return False


def default_config(user_set: Optional[Path] = None) -> NamerConfig:
"""
Attempts reading various locations to fine a namer.cfg file.
"""
config = ConfigUpdater()
config_str = ""
if hasattr(resources, 'files'):
config_str = resources.files("namer").joinpath("namer.cfg.default").read_text()
elif hasattr(resources, 'read_text'):
config_str = resources.read_text("namer", "namer.cfg.default")
config_str = resource_file_to_str("namer", "namer.cfg.default")
config.read_string(config_str)
namer_config = from_config(config, NamerConfig())
namer_config.config_updater = config
Expand Down
Empty file.
67 changes: 43 additions & 24 deletions namer/videophash.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
import concurrent.futures
import subprocess
import platform
import zipfile
import shutil
from importlib import resources
from decimal import Decimal, ROUND_HALF_UP
from pathlib import Path
from types import SimpleNamespace
from typing import List, Literal, Optional

import json
import imagehash
import numpy
import scipy.fft
Expand All @@ -14,28 +17,25 @@
from PIL import Image

from namer.ffmpeg import extract_screenshot, ffprobe
from namer.http import Http


class VideoPerceptualHash:
__screenshot_width: int = 160
__columns: int = 5
__rows: int = 5

__phash_path: Optional[Path]
__phash_name: str = 'stash_phash'
__phash_path: Path
__phash_name: str = 'videohash'

def __init__(self):
self.__phash_path = Path(__file__).parent.parent / 'tools'
if not self.__phash_path.is_dir():
self.__phash_path.mkdir(exist_ok=True, parents=True)

if not [file for file in self.__phash_path.glob('*') if self.__phash_name == file.stem]:
self.__download_stash_phash()
self.__prepare_stash_phash()

def get_phash(self, file: Path) -> Optional[imagehash.ImageHash]:
phash = None

thumbnail_image = self.__generate_image_thumbnail(file)
if thumbnail_image:
phash = self.__phash(thumbnail_image, hash_size=8, high_freq_factor=8, resample=Image.Resampling.BILINEAR) # type: ignore
Expand All @@ -60,37 +60,56 @@ def __generate_image_thumbnail(self, file: Path) -> Optional[Image.Image]:
def get_stash_phash(self, file: Path) -> Optional[imagehash.ImageHash]:
return self.__execute_stash_phash(file)

def __download_stash_phash(self):
def copy_resource_to_file(self, full_path: str, output: Path) -> bool:
parts = full_path.split('/')
if hasattr(resources, 'files'):
trav = resources.files(parts[0])
for part in parts[1:]:
trav = trav.joinpath(part)
with trav.open("rb") as bin, open(output, mode="+bw") as out:
shutil.copyfileobj(bin, out)
return True
if hasattr(resources, 'open_binary'):
with resources.open_binary(".".join(parts[0:-1]), parts[-1]) as bin, open(output, mode="+bw") as out:
shutil.copyfileobj(bin, out)
return True
return False

def __prepare_stash_phash(self):
os = platform.system().lower()
url = f'https://github.com/DirtyRacer1337/stash_phash/releases/download/nightly/stash_phash-{os}.zip'
http_file = Http.download_file(url)
if http_file:
zipfile.ZipFile(http_file).extractall(self.__phash_path)
if os != 'windows' and self.__phash_path:
success = False
post: str = '.exe'
if os == "linux":
post = '-linux'
elif os == 'darwin':
post = '-macos'
if self.__phash_path:
success = self.copy_resource_to_file('namer/videohashtools/videohashes' + post, self.__phash_path / self.__phash_name)
if os != 'windows' and self.__phash_path and success:
file = self.__phash_path / self.__phash_name
file.chmod(0o777)

return bool(http_file)
return success

def __execute_stash_phash(self, file: Path) -> Optional[imagehash.ImageHash]:
phash = None
output = None
if not self.__phash_path:
return phash
return output

args = [
str(self.__phash_path / self.__phash_name),
'-f', str(file),
]
args = [str(self.__phash_path / self.__phash_name), '-json', '--video', str(file)]
with subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) as process:
stdout, stderr = process.communicate()
stdout, stderr = stdout.strip(), stderr.strip()
success = process.returncode == 0
if success:
phash = imagehash.hex_to_hash(stdout)
print(stdout)
data = json.loads(stdout, object_hook=lambda d: SimpleNamespace(**d))
# duration = data.duration
phash = data.phash
# oshash = data.oshash
output = imagehash.hex_to_hash(phash)
else:
logger.error(stderr)

return phash
return output

def __generate_thumbnails(self, file: Path, duration: float) -> List[Image.Image]:
duration = int(Decimal(duration * 100).quantize(0, ROUND_HALF_UP)) / 100
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ max-line-length = 150
[tool.poe.tasks]
install_npm = {shell="yarn install"}
build_yarn = {shell="yarn run build"}
install_videohashes_src = {shell="git submodule update"}
install_videohashes_src = {shell="command -v git >/dev/null && git submodule update || echo 'Skipping git sub module update'"}
build_videohashes = {shell="make build phashcompare-build -C ./videohashes"}
move_videohashes = {"script" = "shutil:copytree('./videohashes/dist', './namer/videohashtools', dirs_exist_ok=True)"}
build_namer = {shell="poetry build"}
Expand Down

0 comments on commit 4205c5d

Please sign in to comment.