'프로그램 사용'에 해당되는 글 2190건

  1. 2024.01.02 주피터 노트북 프로젝트(?) 실행하기
  2. 2024.01.02 i.mx8mp gopoint 실행 경로
  3. 2024.01.02 tensorflow keras dataset
  4. 2024.01.02 tensorflow lite / mnist 학습
  5. 2023.12.27 tesseract 버전별 차이?
  6. 2023.12.27 tesseract 학습 데이터
  7. 2023.12.26 tesseract on arm
  8. 2023.12.21 tesseract ocr
  9. 2023.12.20 nnstreamer
  10. 2023.12.06 gst-device-monitor-1.0

ipynb 라는 확장자가 보여서 확인 중

[링크 : https://github.com/saunack/MobileNetv2-SSD]

 

아나콘다 깔고

거기서 jupiter notebook을 설치하면 된다고

[링크 : https://mananacho.tistory.com/31]

[링크 : https://blog.naver.com/tamiel/221956194782]

 

커맨드 라인으로는 먼가 복잡한데, 쥬피터 없이 돌리는것도 아니고 무슨 의미가 있나 싶긴하다.

$ jupyter nbconvert --execute --to notebook lda2.ipynb

[링크 : https://data-scient2st.tistory.com/234]

Posted by 구차니

문서를  찾다가 지쳐서 걍 실행하고 인자를 보는걸로..

root        3019     925 72 06:38 ?        00:00:25 /usr/bin/python3 /home/root/.nxp-demo-experience/scripts/machine_learning/MLDemoLauncher.py detect

 

root@imx8mpevk:~/.nxp-demo-experience/scripts/machine_learning# cat MLDemoLauncher.py 
#!/usr/bin/env python3

"""
Copyright 2021-2023 NXP

SPDX-License-Identifier: BSD-2-Clause

This script launches the NNStreamer ML Demos using a UI to pick settings.
"""

import gi
import os
import sys
import glob
from gi.repository import Gtk, GLib, Gio

gi.require_version("Gtk", "3.0")

sys.path.append("/home/root/.nxp-demo-experience/scripts/")
import utils


class MLLaunch(Gtk.Window):
    """The GUI window for the ML demo launcher"""

    def __init__(self, demo):
        """Creates the UI window"""
        # Initialization
        self.demo = demo
        super().__init__(title=demo)
        self.set_default_size(450, 200)
        self.set_resizable(False)

        # Get platform
        self.platform = os.uname().nodename
     
        # OpenVX graph caching is not available on i.MX 8QuadMax platform.
        if self.platform != "imx8qmmek" :
            os.environ["VIV_VX_CACHE_BINARY_GRAPH_DIR"] = "/home/root/.cache/gopoint"
            os.environ["VIV_VX_ENABLE_CACHE_GRAPH_BINARY"] = "1"

        # Get widget properties
        devices = []
        if self.demo != "brand" and self.demo != "selfie_nn":
            if self.platform != "imx93evk":
                devices.append("Example Video")

        for device in glob.glob("/dev/video*"):
            devices.append(device)

        backends_available = ["CPU"]
        if (
            os.path.exists("/usr/lib/libvx_delegate.so")
            and self.demo != "pose"
            and self.demo != "selfie_nn"
        ):
            backends_available.insert(1, "GPU")
        if os.path.exists("/usr/lib/libneuralnetworks.so") and self.demo != "brand" and self.platform != "imx8qmmek":
            backends_available.insert(0, "NPU")
        if os.path.exists("/usr/lib/libethosu_delegate.so"):
            backends_available.insert(0, "NPU")
            backends_available.pop()

        displays_available = ["Weston"]

        colors_available = ["Red", "Green", "Blue", "Black", "White"]

        demo_modes_available = ["Background Substitution", "Segmentation Mask"]

        # Create widgets
        main_grid = Gtk.Grid.new()
        device_label = Gtk.Label.new("Source")
        self.device_combo = Gtk.ComboBoxText()
        backend_label = Gtk.Label.new("Backend")
        self.backend_combo = Gtk.ComboBoxText()
        self.display_combo = Gtk.ComboBoxText()
        self.launch_button = Gtk.Button.new_with_label("Run")
        self.status_bar = Gtk.Label.new()
        header = Gtk.HeaderBar()
        quit_button = Gtk.Button()
        quit_icon = Gio.ThemedIcon(name="process-stop-symbolic")
        quit_image = Gtk.Image.new_from_gicon(quit_icon, Gtk.IconSize.BUTTON)
        separator = Gtk.Separator.new(0)
        time_title_label = Gtk.Label.new("Video Refresh")
        self.time_label = Gtk.Label.new("--.-- ms")
        self.fps_label = Gtk.Label.new("-- FPS")
        inference_title_label = Gtk.Label.new("Inference Time")
        self.inference_label = Gtk.Label.new("--.-- ms")
        self.ips_label = Gtk.Label.new("-- IPS")
        if self.demo != "selfie_nn":
            self.width_entry = self.r_scale = Gtk.Scale.new_with_range(
                Gtk.Orientation.HORIZONTAL, 300, 1920, 2
            )
            self.height_entry = self.r_scale = Gtk.Scale.new_with_range(
                Gtk.Orientation.HORIZONTAL, 300, 1080, 2
            )
            self.width_label = Gtk.Label.new("Height")
            self.height_label = Gtk.Label.new("Width")
            self.color_label = Gtk.Label.new("Label Color")
        else:
            self.color_label = Gtk.Label.new("Text Color")
            self.demo_mode = Gtk.Label.new("Demo Mode")
            self.mode_combo = Gtk.ComboBoxText()
        self.color_combo = Gtk.ComboBoxText()

        # Organize widgets
        self.add(main_grid)
        self.set_titlebar(header)

        quit_button.add(quit_image)
        header.pack_end(quit_button)

        main_grid.set_row_spacing(10)
        main_grid.set_border_width(10)

        main_grid.attach(device_label, 0, 1, 2, 1)
        device_label.set_hexpand(True)
        main_grid.attach(backend_label, 0, 2, 2, 1)
        # main_grid.attach(display_label, 0, 3, 2, 1)
        if self.demo != "selfie_nn":
            main_grid.attach(self.width_label, 0, 4, 2, 1)
            main_grid.attach(self.height_label, 0, 5, 2, 1)
            main_grid.attach(self.color_label, 0, 6, 2, 1)
        else:
            main_grid.attach(self.demo_mode, 0, 4, 2, 1)
            main_grid.attach(self.color_label, 0, 5, 2, 1)

        main_grid.attach(self.device_combo, 2, 1, 2, 1)
        self.device_combo.set_hexpand(True)
        main_grid.attach(self.backend_combo, 2, 2, 2, 1)
        # main_grid.attach(self.display_combo, 2, 3, 2, 1)
        if self.demo != "selfie_nn":
            main_grid.attach(self.width_entry, 2, 4, 2, 1)
            main_grid.attach(self.height_entry, 2, 5, 2, 1)
            main_grid.attach(self.color_combo, 2, 6, 2, 1)
        else:
            main_grid.attach(self.mode_combo, 2, 4, 2, 1)
            main_grid.attach(self.color_combo, 2, 5, 2, 1)

        main_grid.attach(self.launch_button, 0, 7, 4, 1)
        main_grid.attach(self.status_bar, 0, 8, 4, 1)

        main_grid.attach(separator, 0, 9, 4, 1)

        main_grid.attach(time_title_label, 0, 10, 2, 1)
        main_grid.attach(self.time_label, 0, 11, 1, 1)
        main_grid.attach(self.fps_label, 1, 11, 1, 1)
        main_grid.attach(inference_title_label, 2, 10, 2, 1)
        main_grid.attach(self.inference_label, 2, 11, 1, 1)
        main_grid.attach(self.ips_label, 3, 11, 1, 1)

        # Configure widgets
        for device in devices:
            self.device_combo.append_text(device)
        for backend in backends_available:
            self.backend_combo.append_text(backend)
        for display in displays_available:
            self.display_combo.append_text(display)
        for color in colors_available:
            self.color_combo.append_text(color)
        if self.demo == "selfie_nn":
            for mode in demo_modes_available:
                self.mode_combo.append_text(mode)

        self.device_combo.set_active(0)
        self.backend_combo.set_active(0)
        self.display_combo.set_active(0)
        self.color_combo.set_active(0)
        if self.demo != "selfie_nn":
            self.width_entry.set_value(1920)
            self.height_entry.set_value(1080)
            self.width_entry.set_sensitive(False)
            self.height_entry.set_sensitive(False)
        else:
            self.mode_combo.set_active(0)
        self.device_combo.connect("changed", self.on_source_change)
        self.launch_button.connect("clicked", self.start)
        quit_button.connect("clicked", exit)
        if self.demo == "detect":
            header.set_title("Detection Demo")
        elif self.demo == "id":
            header.set_title("Classification Demo")
        elif self.demo == "pose":
            header.set_title("Pose Demo")
        elif self.demo == "brand":
            header.set_title("Brand Demo")
        elif self.demo == "selfie_nn":
            header.set_title("Selfie Segmenter Demo")
        else:
            header.set_title("NNStreamer Demo")
        header.set_subtitle("NNStreamer Examples")

    def start(self, button):
        """Starts the ML Demo with selected settings"""
        self.update_time = GLib.get_monotonic_time()
        self.launch_button.set_sensitive(False)
        if self.color_combo.get_active_text() == "Red":
            r = 1
            g = 0
            b = 0
        elif self.color_combo.get_active_text() == "Blue":
            r = 0
            g = 0
            b = 1
        elif self.color_combo.get_active_text() == "Green":
            r = 0
            g = 1
            b = 0
        elif self.color_combo.get_active_text() == "Black":
            r = 0
            g = 0
            b = 0
        elif self.color_combo.get_active_text() == "White":
            r = 1
            g = 1
            b = 1
        else:
            r = 1
            g = 0
            b = 0
        if self.demo == "detect":
            if self.platform == "imx93evk":
                model = utils.download_file("mobilenet_ssd_v2_coco_quant_postprocess_vela.tflite")
            else:
                model = utils.download_file("mobilenet_ssd_v2_coco_quant_postprocess.tflite")
            labels = utils.download_file("coco_labels.txt")
            if self.device_combo.get_active_text() == "Example Video":
                device = utils.download_file("detect_example.mov")
            else:
                device = self.device_combo.get_active_text()
            if model == -1 or model == -2 or model == -3:
                if self.platform == "imx93evk":
                    error = "mobilenet_ssd_v2_coco_quant_postprocess_vela.tflite"
                else:
                    error = "mobilenet_ssd_v2_coco_quant_postprocess.tflite"
            elif labels == -1 or labels == -2 or labels == -3:
                error = "coco_labels.txt"
            elif device == -1 or device == -2 or device == -3:
                error = "detect_example.mov"
        if self.demo == "id":
            if self.platform == "imx93evk":
                model = utils.download_file("mobilenet_v1_1.0_224_quant_vela.tflite")
            else:
                model = utils.download_file("mobilenet_v1_1.0_224_quant.tflite")
            labels = utils.download_file("1_1.0_224_labels.txt")
            if self.device_combo.get_active_text() == "Example Video":
                device = utils.download_file("id_example.mov")
            else:
                device = self.device_combo.get_active_text()
            if model == -1 or model == -2 or model == -3:
                if self.platform == "imx93evk":
                    error = "mobilenet_v1_1.0_224_quant_vela.tflite"
                else:
                    error = "mobilenet_v1_1.0_224_quant.tflite"
            elif labels == -1 or labels == -2 or labels == -3:
                error = "1_1.0_224_labels.txt"
            elif device == -1 or device == -2 or device == -3:
                error = "id_example.mov"
        if self.demo == "pose":
            model = utils.download_file("posenet_resnet50_uint8_float32_quant.tflite")
            labels = utils.download_file("key_point_labels.txt")
            if self.device_combo.get_active_text() == "Example Video":
                device = utils.download_file("pose_example.mov")
            else:
                device = self.device_combo.get_active_text()
            if model == -1 or model == -2 or model == -3:
                error = "posenet_resnet50_uint8_float32_quant.tflite"
            elif labels == -1 or labels == -2 or labels == -3:
                error = "key_point_labels.txt"
            elif device == -1 or device == -2 or device == -3:
                error = "pose_example.mov"
        if self.demo == "brand":
            model = utils.download_file("brand_model.tflite")
            labels = utils.download_file("brand_labels.txt")
            if self.device_combo.get_active_text() == "Example Video":
                device = utils.download_file("brand_example.mov")
            else:
                device = self.device_combo.get_active_text()
            if model == -1 or model == -2 or model == -3:
                error = "brand_model.tflite"
            elif labels == -1 or labels == -2 or labels == -3:
                error = "brand_labels.txt"
            elif device == -1 or device == -2 or device == -3:
                error = "brand_example.mov"
        if self.demo == "selfie_nn":
            if self.platform == "imx93evk":
                model = utils.download_file(
                    "selfie_segmenter_landscape_int8_vela.tflite"
                )
            else:
                model = utils.download_file("selfie_segmenter_int8.tflite")
            # Labels refer to background img
            if self.platform == "imx93evk":
                labels = utils.download_file("bg_image_landscape.jpg")
            else:
                labels = utils.download_file("bg_image.jpg")
            if self.device_combo.get_active_text() == "Example Video":
                device = utils.download_file("selfie_example.mov")
            else:
                device = self.device_combo.get_active_text()
            if model == -1 or model == -2 or model == -3:
                if self.platform == "imx93evk":
                    error = "selfie_segmenter_landscape_int8_vela.tflite"
                else:
                    error = "selfie_segmenter_int8.tflite"
            elif labels == -1 or labels == -2 or labels == -3:
                if self.platform == "imx93evk":
                    error = "bg_image_landscape.jpg"
                else:
                    error = "bg_image.jpg"
            elif device == -1 or device == -2 or device == -3:
                error = "selfie_example.mov"
            if self.mode_combo.get_active_text() == "Background Substitution":
                set_mode = 0
            else:
                set_mode = 1

        if model == -1 or labels == -1 or device == -1:
            """
            dialog = Gtk.MessageDialog(
                transient_for=self,
                flags=0,
                message_type=Gtk.MessageType.ERROR,
                buttons=Gtk.ButtonsType.CANCEL,
                text="Cannot find files! The file that you requested" +
                " does not have any metadata that is related to it. " +
                "Please see /home/root/.nxp-demo-experience/downloads.txt" +
                " to see if the requested file exists! \n \n Cannot find:" +
                error)
            dialog.run()
            dialog.destroy()
            """
            self.status_bar.set_text("Cannot find files!")
            self.launch_button.set_sensitive(True)
            return
        if model == -2 or labels == -2 or device == -2:
            """
            dialog = Gtk.MessageDialog(
                transient_for=self,
                flags=0,
                message_type=Gtk.MessageType.ERROR,
                buttons=Gtk.ButtonsType.CANCEL,
                text="Cannot download files! The URL used to download the" +
                " file cannot be reached. If you are connected to the " +
                "internet, please check the /home/root/.nxp-demo-experience" +
                "/downloads.txt for the URL. For some regions, " +
                "these sites may be blocked. To install these manually," +
                " please go to the file listed above and provide the " +
                "path to the file in \"PATH\" \n \n Cannot download " + error)
            dialog.run()
            dialog.destroy()
            """
            self.status_bar.set_text("Download failed!")
            self.launch_button.set_sensitive(True)
            return
        if model == -3 or labels == -3 or device == -4:
            """
            dialog = Gtk.MessageDialog(
                transient_for=self,
                flags=0,
                message_type=Gtk.MessageType.ERROR,
                buttons=Gtk.ButtonsType.CANCEL,
                text="Invalid files! The files where not what we expected." +
                "If you are SURE that the files are correct, delete " +
                "the \"SHA\" value in /home/root/.nxp-demo-experience" +
                "/downloads.txt to bypass the SHA check. \n \n Bad SHA for " +
                error)
            dialog.run()
            dialog.destroy()
            """
            self.status_bar.set_text("Downloaded bad file!")
            self.launch_button.set_sensitive(True)
            return
        if self.demo == "detect":
            import nndetection

            example = nndetection.ObjectDetection(
                self.platform,
                device,
                self.backend_combo.get_active_text(),
                model,
                labels,
                self.display_combo.get_active_text(),
                self.update_stats,
                self.width_entry.get_value(),
                self.height_entry.get_value(),
                r,
                g,
                b,
            )
            example.run()
        if self.demo == "id":
            import nnclassification

            example = nnclassification.NNStreamerExample(
                self.platform,
                device,
                self.backend_combo.get_active_text(),
                model,
                labels,
                self.display_combo.get_active_text(),
                self.update_stats,
                self.width_entry.get_value(),
                self.height_entry.get_value(),
                r,
                g,
                b,
            )
            example.run_example()
        if self.demo == "pose":
            import nnpose

            example = nnpose.NNStreamerExample(
                self.platform,
                device,
                self.backend_combo.get_active_text(),
                model,
                labels,
                self.display_combo.get_active_text(),
                self.update_stats,
                self.width_entry.get_value(),
                self.height_entry.get_value(),
                r,
                g,
                b,
            )
            example.run_example()
        if self.demo == "brand":
            import nnbrand

            example = nnbrand.NNStreamerExample(
                self.platform,
                device,
                self.backend_combo.get_active_text(),
                model,
                labels,
                self.display_combo.get_active_text(),
                self.update_stats,
                self.width_entry.get_value(),
                self.height_entry.get_value(),
                r,
                g,
                b,
            )
            example.run_example()
        if self.demo == "selfie_nn":
            import selfie_segmenter

            example = selfie_segmenter.SelfieSegmenter(
                self.platform,
                device,
                self.backend_combo.get_active_text(),
                model,
                labels,
                self.update_stats,
                set_mode,
                r,
                g,
                b,
            )
            example.run()

        self.launch_button.set_sensitive(True)

    def update_stats(self, time):
        """Callback used the update stats in GUI"""
        interval_time = (GLib.get_monotonic_time() - self.update_time) / 1000000
        if interval_time > 1:
            refresh_time = time.interval_time
            inference_time = time.tensor_filter.get_property("latency")

            if refresh_time != 0 and inference_time != 0:
                # Print pipeline information
                if self.demo == "selfie_nn" or self.demo == "id" or self.demo == "detect":
                    self.time_label.set_text(
                        "{:12.2f} ms".format(1.0 / time.current_framerate * 1000.0)
                    )
                    self.fps_label.set_text(
                        "{:12.2f} FPS".format(time.current_framerate)
                    )
                else:
                    self.time_label.set_text("{:12.2f} ms".format(refresh_time / 1000))
                    self.fps_label.set_text(
                        "{:12.2f} FPS".format(1 / (refresh_time / 1000000))
                    )
                # Print inference information
                self.inference_label.set_text(
                    "{:12.2f} ms".format(inference_time / 1000)
                )
                self.ips_label.set_text(
                    "{:12.2f} FPS".format(1 / (inference_time / 1000000))
                )
            self.update_time = GLib.get_monotonic_time()
        return True

    def on_source_change(self, widget):
        """Callback to lock sliders"""
        if self.demo != "selfie_nn":
            if self.device_combo.get_active_text() == "Example Video":
                self.width_entry.set_value(1920)
                self.height_entry.set_value(1080)
                self.width_entry.set_sensitive(False)
                self.height_entry.set_sensitive(False)
            else:
                self.width_entry.set_sensitive(True)
                self.height_entry.set_sensitive(True)


if __name__ == "__main__":
    if (
        len(sys.argv) != 2
        and sys.argv[1] != "detect"
        and sys.argv[1] != "id"
        and sys.argv[1] != "pose"
        and sys.argv[1] != "selfie_nn"
    ):
        print("Demos available: detect, id, pose, selfie_nn")
    else:
        win = MLLaunch(sys.argv[1])
        win.connect("destroy", Gtk.main_quit)
        win.show_all()
        Gtk.main()

 

아래가 실행되는 녀석인데 nndetection을 import 하니까 그걸 따라가서 보는 중.

그나저나 LGPL 이면 그냥 공개해도 되려나?

root@imx8mpevk:~/.nxp-demo-experience/scripts/machine_learning# find / -name nndetection.py
/run/media/root-mmcblk2p2/home/root/.nxp-demo-experience/scripts/machine_learning/nndetection.py
/home/root/.nxp-demo-experience/scripts/machine_learning/nndetection.py

root@imx8mpevk:~/.nxp-demo-experience/scripts/machine_learning# cat /home/root/.nxp-demo-experience/scripts/machine_learning/nndetection.py
#!/usr/bin/env python3

"""
Copyright SSAFY Team 1 <jangjongha.sw@gmail.com>
Copyright 2021-2023 NXP

SPDX-License-Identifier: LGPL-2.1-only
Original Source: https://github.com/nnstreamer/nnstreamer-example

This demo shows how you can use the NNStreamer to detect objects.

From the original source, this was modified to better work with the a
GUI and to get better performance on the i.MX 8M Plus and i.MX93.
"""

import os
import sys
import gi
import re
import logging
import numpy as np
import cairo

gi.require_version("Gst", "1.0")
gi.require_foreign("cairo")
from gi.repository import Gst, GObject, GLib

DEBUG = False


class ObjectDetection:
    """The class that manages the demo"""

    def __init__(
        self,
        platform,
        device,
        backend,
        model,
        labels,
        display="Weston",
        callback=None,
        width=1920,
        height=1080,
        r=1,
        g=0,
        b=0,
    ):
        """Creates an instance of the demo

        Arguments:
        device -- What camera or video file to use
        backend -- Whether to use NPU or CPU
        model -- the path to the model
        labels -- the path to the labels
        display -- Whether to use X11 or Weston
        callback -- Callback to pass stats to
        width -- Width of output
        height -- Height of output
        r -- Red value for labels
        g -- Green value for labels
        b -- Blue value for labels
        """
        self.loop = None
        self.pipeline = None
        self.running = False
        self.video_caps = None
        self.first_frame = True

        self.BOX_SIZE = 4
        self.LABEL_SIZE = 91
        self.DETECTION_MAX = 20
        self.MAX_OBJECT_DETECTION = 20

        self.Y_SCALE = 10.0
        self.X_SCALE = 10.0
        self.H_SCALE = 5.0
        self.W_SCALE = 5.0

        self.VIDEO_WIDTH = width
        self.VIDEO_HEIGHT = height
        self.MODEL_WIDTH = 300
        self.MODEL_HEIGHT = 300

        self.tflite_model = model
        self.label_path = labels
        self.device = device
        self.backend = backend
        self.display = display
        self.tflite_labels = []
        self.detected_objects = []
        self.callback = callback
        self.r = r
        self.b = b
        self.g = g
        self.platform = platform
        self.current_framerate = 1000

        # Define PXP or GPU2D converter
        if self.platform == "imx93evk":
            self.nxp_converter = "imxvideoconvert_pxp "
        else:
            self.nxp_converter = "imxvideoconvert_g2d "

        if not self.tflite_init():
            raise Exception

        Gst.init(None)

    def run(self):
        """Starts pipeline and run demo"""

        if self.backend == "CPU":
            if self.platform == "imx93evk":
                backend = "true:cpu custom=NumThreads:2"
            else:
                backend = "true:cpu custom=NumThreads:4"
        elif self.backend == "GPU":
            os.environ["USE_GPU_INFERENCE"] = "1"
            backend = (
                "true:gpu custom=Delegate:External," "ExtDelegateLib:libvx_delegate.so"
            )
        else:
            if self.platform == "imx93evk":
                backend = (
                    "true:npu custom=Delegate:External,"
                    "ExtDelegateLib:libethosu_delegate.so"
                )
            else:
                os.environ["USE_GPU_INFERENCE"] = "0"
                backend = (
                    "true:npu custom=Delegate:External,"
                    "ExtDelegateLib:libvx_delegate.so"
                )

        if self.display == "X11":
            display = "ximagesink name=img_tensor "
        elif self.display == "None":
            self.print_time = GLib.get_monotonic_time()
            display = "fakesink "
        else:
            display = "fpsdisplaysink name=img_tensor text-overlay=false video-sink=waylandsink sync=false"

        # main loop
        self.loop = GLib.MainLoop()
        self.old_time = GLib.get_monotonic_time()
        self.update_time = GLib.get_monotonic_time()
        self.reload_time = -1
        self.interval_time = 999999

        # Create decoder for video file
        if self.platform == "imx8qmmek":
            decoder = "h264parse ! v4l2h264dec "
        else:
            decoder = "vpudec "

        if "/dev/video" in self.device:
            gst_launch_cmdline = "v4l2src name=cam_src device=" + self.device
            gst_launch_cmdline += " ! " + self.nxp_converter + "! video/x-raw,width="
            gst_launch_cmdline += str(int(self.VIDEO_WIDTH)) + ",height="
            gst_launch_cmdline += str(int(self.VIDEO_HEIGHT))
            gst_launch_cmdline += ",framerate=30/1,format=BGRx ! tee name=t"
        else:
            gst_launch_cmdline = "filesrc location=" + self.device
            gst_launch_cmdline += " ! qtdemux ! " + decoder + "! tee name=t"

        gst_launch_cmdline += " t. ! " + self.nxp_converter + "!  video/x-raw,"
        gst_launch_cmdline += "width={:d},".format(self.MODEL_WIDTH)
        gst_launch_cmdline += "height={:d},".format(self.MODEL_HEIGHT)
        gst_launch_cmdline += " ! queue max-size-buffers=2 leaky=2 ! "
        gst_launch_cmdline += "videoconvert ! video/x-raw,format=RGB !"
        gst_launch_cmdline += " tensor_converter ! tensor_filter"
        gst_launch_cmdline += " framework=tensorflow-lite model="
        gst_launch_cmdline += self.tflite_model + " accelerator=" + backend
        gst_launch_cmdline += " silent=FALSE name=tensor_filter latency=1 ! "
        gst_launch_cmdline += "tensor_sink name=tensor_sink t. ! "
        gst_launch_cmdline += self.nxp_converter + "! "
        gst_launch_cmdline += "cairooverlay name=tensor_res ! "
        gst_launch_cmdline += "queue max-size-buffers=2 leaky=2 ! "
        gst_launch_cmdline += display

        self.pipeline = Gst.parse_launch(gst_launch_cmdline)

        # bus and message callback
        bus = self.pipeline.get_bus()
        bus.add_signal_watch()
        bus.connect("message", self.on_bus_message)

        self.tensor_filter = self.pipeline.get_by_name("tensor_filter")
        self.wayland_sink = self.pipeline.get_by_name("img_tensor")

        # tensor sink signal : new data callback
        tensor_sink = self.pipeline.get_by_name("tensor_sink")
        tensor_sink.connect("new-data", self.new_data_cb)

        tensor_res = self.pipeline.get_by_name("tensor_res")
        tensor_res.connect("draw", self.draw_overlay_cb)
        tensor_res.connect("caps-changed", self.prepare_overlay_cb)
        if self.callback is not None:
            GObject.timeout_add(500, self.callback, self)

        # start pipeline
        self.pipeline.set_state(Gst.State.PLAYING)
        self.running = True

        self.set_window_title("img_tensor", "NNStreamer Object Detection Example")

        # run main loop
        self.loop.run()

        # quit when received eos or error message
        self.running = False
        self.pipeline.set_state(Gst.State.NULL)

        bus.remove_signal_watch()

    def tflite_init(self):
        """
        :return: True if successfully initialized
        """

        if not os.path.exists(self.tflite_model):
            logging.error("cannot find tflite model [%s]", self.tflite_model)
            return False

        label_path = self.label_path
        try:
            with open(label_path, "r") as label_file:
                for line in label_file.readlines():
                    if line[0].isdigit():
                        while str(len(self.tflite_labels)) not in line:
                            self.tflite_labels.append("Invalid")
                        self.tflite_labels.append(line[line.find(" ") + 1 :])
                    else:
                        self.tflite_labels.append(line)
        except FileNotFoundError:
            logging.error("cannot find tflite label [%s]", label_path)
            return False

        logging.info("finished to load labels, total [%d]", len(self.tflite_labels))
        return True

    # @brief Callback for tensor sink signal.
    def new_data_cb(self, sink, buffer):
        """Callback for tensor sink signal.

        :param sink: tensor sink element
        :param buffer: buffer from element
        :return: None
        """
        if self.running:
            new_time = GLib.get_monotonic_time()
            self.interval_time = new_time - self.old_time
            self.old_time = new_time
            if buffer.n_memory() != 4:
                return False

            #  tensor type is float32.
            # LOCATIONS_IDX:CLASSES_IDX:SCORES_IDX:NUM_DETECTION_IDX
            # 4:20:1:1\,20:1:1:1\,20:1:1:1\,1:1:1:1
            # [0] detection_boxes (default 4th tensor). BOX_SIZE :
            # #MaxDetection, ANY-TYPE
            # [1] detection_classes (default 2nd tensor).
            # #MaxDetection, ANY-TYPE
            # [2] detection_scores (default 3rd tensor)
            # #MaxDetection, ANY-TYPE
            # [3] num_detection (default 1st tensor). 1, ANY-TYPE

            # bytestrings that are based on float32 must be
            # decoded into float list.

            # boxes
            mem_boxes = buffer.peek_memory(0)
            ret, info_boxes = mem_boxes.map(Gst.MapFlags.READ)
            if ret:
                assert info_boxes.size == (
                    self.BOX_SIZE * self.DETECTION_MAX * 4
                ), "Invalid info_box size"
                decoded_boxes = list(
                    np.frombuffer(info_boxes.data, dtype=np.float32)
                )  # decode bytestrings to float list

            # detections
            mem_detections = buffer.peek_memory(1)
            ret, info_detections = mem_detections.map(Gst.MapFlags.READ)
            if ret:
                assert info_detections.size == (
                    self.DETECTION_MAX * 4
                ), "Invalid info_detection size"
                decoded_detections = list(
                    np.frombuffer(info_detections.data, dtype=np.float32)
                )  # decode bytestrings to float list

            # scores
            mem_scores = buffer.peek_memory(2)
            ret, info_scores = mem_scores.map(Gst.MapFlags.READ)
            if ret:
                assert info_scores.size == (
                    self.DETECTION_MAX * 4
                ), "Invalid info_score size"
                decoded_scores = list(
                    np.frombuffer(info_scores.data, dtype=np.float32)
                )  # decode bytestrings to float list

            # num detection
            mem_num = buffer.peek_memory(3)
            ret, info_num = mem_num.map(Gst.MapFlags.READ)
            if ret:
                assert info_num.size == 4, "Invalid info_num size"
                decoded_num = list(
                    np.frombuffer(info_num.data, dtype=np.float32)
                )  # decode bytestrings to float list

            self.get_detected_objects(
                decoded_boxes, decoded_detections, decoded_scores, int(decoded_num[0])
            )

            mem_boxes.unmap(info_boxes)
            mem_detections.unmap(info_detections)
            mem_scores.unmap(info_scores)
            mem_num.unmap(info_num)

            if self.display == "None":
                if (GLib.get_monotonic_time() - self.print_time) > 1000000:
                    inference = self.tensor_filter.get_property("latency")
                    print(
                        "Inference time: "
                        + str(inference / 1000)
                        + " ms ("
                        + "{:5.2f}".format(1 / (inference / 1000000))
                        + " IPS)"
                    )
                    self.print_time = GLib.get_monotonic_time()

    def get_detected_objects(self, boxes, detections, scores, num):
        """Pairs boxes with dectected objects"""
        threshold_score = 0.5
        detected = list()

        for i in range(num):
            score = scores[i]
            if score < threshold_score:
                continue

            c = detections[i]

            box_offset = self.BOX_SIZE * i
            ymin = boxes[box_offset + 0]
            xmin = boxes[box_offset + 1]
            ymax = boxes[box_offset + 2]
            xmax = boxes[box_offset + 3]

            x = xmin * self.MODEL_WIDTH
            y = ymin * self.MODEL_HEIGHT
            width = (xmax - xmin) * self.MODEL_WIDTH
            height = (ymax - ymin) * self.MODEL_HEIGHT

            obj = {
                "class_id": int(c),
                "x": x,
                "y": y,
                "width": width,
                "height": height,
                "prob": score,
            }

            detected.append(obj)

        # update result
        self.detected_objects.clear()

        for d in detected:
            self.detected_objects.append(d)
            if DEBUG:
                print("==============================")
                print("LABEL           : {}".format(self.tflite_labels[d["class_id"]]))
                print("x               : {}".format(d["x"]))
                print("y               : {}".format(d["y"]))
                print("width           : {}".format(d["width"]))
                print("height          : {}".format(d["height"]))
                print("Confidence Score: {}".format(d["prob"]))

    def prepare_overlay_cb(self, overlay, caps):
        """Store the information from the caps that we are interested in."""
        self.video_caps = caps

    def draw_overlay_cb(self, overlay, context, timestamp, duration):
        """Callback to draw the overlay."""
        if self.video_caps is None or not self.running:
            return
        scale_height = self.VIDEO_HEIGHT / 1080
        scale_width = self.VIDEO_WIDTH / 1920
        scale_text = max(scale_height, scale_width)

        # mutex_lock alternative required
        detected = self.detected_objects
        # mutex_unlock alternative needed

        drawed = 0
        context.select_font_face(
            "Sans", cairo.FONT_SLANT_NORMAL, cairo.FONT_WEIGHT_BOLD
        )
        context.set_font_size(int(50.0 * scale_text))
        context.set_source_rgb(self.r, self.g, self.b)

        for obj in detected:
            label = self.tflite_labels[obj["class_id"]][:-1]
            x = obj["x"] * self.VIDEO_WIDTH // self.MODEL_WIDTH
            y = obj["y"] * self.VIDEO_HEIGHT // self.MODEL_HEIGHT
            width = obj["width"] * self.VIDEO_WIDTH // self.MODEL_WIDTH
            height = obj["height"] * self.VIDEO_HEIGHT // self.MODEL_HEIGHT

            # draw rectangle
            context.rectangle(x, y, width, height)
            context.set_line_width(3)
            context.stroke()

            # draw title
            context.move_to(x + 5, y + int(50.0 * scale_text))
            context.show_text(label)

            drawed += 1
            if drawed >= self.MAX_OBJECT_DETECTION:
                break

        inference = self.tensor_filter.get_property("latency")
        # Get current framerate and avg. framerate
        output_wayland = self.wayland_sink.get_property("last-message")
        if output_wayland:
            current_text = re.findall(r"current:\s[\d]+[.\d]*", output_wayland)[0]
            self.current_framerate = float(re.findall(r"[\d]+[.\d]*", current_text)[0])

        context.set_font_size(int(25.0 * scale_text))
        context.move_to(
            int(50 * scale_width), int(self.VIDEO_HEIGHT - (100 * scale_height))
        )
        context.show_text("i.MX NNStreamer Detection Demo")
        if inference == 0:
            context.move_to(
                int(50 * scale_width), int(self.VIDEO_HEIGHT - (75 * scale_height))
            )
            context.show_text("FPS: ")
            context.move_to(
                int(50 * scale_width), int(self.VIDEO_HEIGHT - (50 * scale_height))
            )
            context.show_text("IPS: ")
        elif (
            GLib.get_monotonic_time() - self.reload_time
        ) < 100000 and self.refresh_time != -1:
            context.move_to(
                int(50 * scale_width), int(self.VIDEO_HEIGHT - (75 * scale_height))
            )
            context.show_text(
                "FPS: {:6.2f} ({:6.2f} ms)".format(
                    self.current_framerate, 1.0 / self.current_framerate * 1000
                )
            )
            context.move_to(
                int(50 * scale_width), int(self.VIDEO_HEIGHT - (50 * scale_height))
            )
            context.show_text(
                "IPS: {:6.2f} ({:6.2f} ms)".format(
                    1 / (inference / 1000000), inference / 1000
                )
            )
        else:
            self.reload_time = GLib.get_monotonic_time()
            self.refresh_time = self.interval_time
            self.inference = self.tensor_filter.get_property("latency")
            context.move_to(
                int(50 * scale_width), int(self.VIDEO_HEIGHT - (75 * scale_height))
            )
            context.show_text(
                "FPS: {:6.2f} ({:6.2f} ms)".format(
                    self.current_framerate, 1.0 / self.current_framerate * 1000
                )
            )
            context.move_to(
                int(50 * scale_width), int(self.VIDEO_HEIGHT - (50 * scale_height))
            )
            context.show_text(
                "IPS: {:6.2f} ({:6.2f} ms)".format(
                    1 / (inference / 1000000), inference / 1000
                )
            )
        if self.first_frame:
            context.move_to(int(400 * scale_width), int(600 * scale_height))
            context.set_font_size(int(200.0 * min(scale_width, scale_height)))
            context.show_text("Loading...")
            self.first_frame = False
        context.fill()

    def on_bus_message(self, bus, message):
        """Callback for message.

        :param bus: pipeline bus
        :param message: message from pipeline
        :return: None
        """
        if message.type == Gst.MessageType.EOS:
            logging.info("received eos message")
            self.loop.quit()
        elif message.type == Gst.MessageType.ERROR:
            error, debug = message.parse_error()
            logging.warning("[error] %s : %s", error.message, debug)
            self.loop.quit()
        elif message.type == Gst.MessageType.WARNING:
            error, debug = message.parse_warning()
            logging.warning("[warning] %s : %s", error.message, debug)
        elif message.type == Gst.MessageType.STREAM_START:
            logging.info("received start message")
        elif message.type == Gst.MessageType.QOS:
            data_format, processed, dropped = message.parse_qos_stats()
            format_str = Gst.Format.get_name(data_format)
            logging.debug(
                "[qos] format[%s] processed[%d] dropped[%d]",
                format_str,
                processed,
                dropped,
            )

    def set_window_title(self, name, title):
        """Set window title for X11.

        :param name: GstXImageasink element name
        :param title: window title
        :return: None
        """
        element = self.pipeline.get_by_name(name)
        if element is not None:
            pad = element.get_static_pad("sink")
            if pad is not None:
                tags = Gst.TagList.new_empty()
                tags.add_value(Gst.TagMergeMode.APPEND, "title", title)
                pad.send_event(Gst.Event.new_tag(tags))


if __name__ == "__main__":
    if (
        len(sys.argv) != 7
        and len(sys.argv) != 5
        and len(sys.argv) != 9
        and len(sys.argv) != 12
        and len(sys.argv) != 6
    ):
        print(
            "Usage: python3 nndetection.py <dev/video*/video file>"
            + " <NPU/CPU> <model file> <label file>"
        )
        exit()
    # Get platform
    platform = os.uname().nodename
    if len(sys.argv) == 7:
        example = ObjectDetection(
            platform,
            sys.argv[1],
            sys.argv[2],
            sys.argv[3],
            sys.argv[4],
            sys.argv[5],
            sys.argv[6],
        )
    if len(sys.argv) == 5:
        example = ObjectDetection(
            platform, sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]
        )
    if len(sys.argv) == 6:
        example = ObjectDetection(
            platform, sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4], sys.argv[5]
        )
    if len(sys.argv) == 9:
        example = ObjectDetection(
            platform,
            sys.argv[1],
            sys.argv[2],
            sys.argv[3],
            sys.argv[4],
            sys.argv[5],
            sys.argv[6],
            int(sys.argv[7]),
            int(sys.argv[8]),
        )
    if len(sys.argv) == 12:
        example = ObjectDetection(
            platform,
            sys.argv[1],
            sys.argv[2],
            sys.argv[3],
            sys.argv[4],
            sys.argv[5],
            sys.argv[6],
            int(sys.argv[7]),
            int(sys.argv[8]),
            int(sys.argv[9]),
            int(sys.argv[10]),
            int(sys.argv[11]),
        )
    example.run()

 

        self.pipeline = Gst.parse_launch(
            'v4l2src name=cam_src ! videoconvert ! videoscale ! '
            'video/x-raw,width=640,height=480,format=RGB ! tee name=t_raw '
            't_raw. ! queue leaky=2 max-size-buffers=2 ! videoscale ! video/x-raw,width=300,height=300 ! tensor_converter ! '
            'tensor_transform mode=arithmetic option=typecast:float32,add:-127.5,div:127.5 ! '
            'tensor_filter framework=tensorflow-lite model=' + self.tflite_model + ' ! '
            'tensor_decoder mode=bounding_boxes option1=mobilenet-ssd option2='
            + self.tflite_label + ' option3=' + self.tflite_box_prior + ' option4=640:480 option5=300:300 !'
            'compositor name=mix sink_0::zorder=2 sink_1::zorder=1 ! videoconvert ! ximagesink '
            't_raw. ! queue leaky=2 max-size-buffers=10 ! mix. '
        )

[링크 : https://github.com/nnstreamer/nnstreamer-example/blob/main/native/example_object_detection_tensorflow_lite/nnstreamer_example_object_detection_tflite.py]

 

gst_launch_cmdline 를 출력해보니 아래와 같이 gstreamer 파이프라인이 나온다.

v4l2src name=cam_src device=/dev/video3 ! imxvideoconvert_g2d ! video/x-raw,width=1920,height=1080,framerate=30/1,format=BGRx ! tee name=t t. ! imxvideoconvert_g2d !  video/x-raw,width=300,height=300, ! queue max-size-buffers=2 leaky=2 ! videoconvert ! video/x-raw,format=RGB ! tensor_converter ! tensor_filter framework=tensorflow-lite model=/home/root/.cache/gopoint/mobilenet_ssd_v2_coco_quant_postprocess.tflite accelerator=true:npu custom=Delegate:External,ExtDelegateLib:libvx_delegate.so silent=FALSE name=tensor_filter latency=1 ! tensor_sink name=tensor_sink t. ! imxvideoconvert_g2d ! cairooverlay name=tensor_res ! queue max-size-buffers=2 leaky=2 ! fpsdisplaysink name=img_tensor text-overlay=false video-sink=waylandsink sync=false

 

보기어려우니 엔터로 구분

v4l2src name=cam_src device=/dev/video3 !
imxvideoconvert_g2d !
video/x-raw,width=1920,height=1080,framerate=30/1,format=BGRx !
tee name=t t. !
imxvideoconvert_g2d !
video/x-raw,width=300,height=300, !
queue max-size-buffers=2 leaky=2 !
videoconvert !
video/x-raw,format=RGB !
tensor_converter !
tensor_filter framework=tensorflow-lite model=/home/root/.cache/gopoint/mobilenet_ssd_v2_coco_quant_postprocess.tflite accelerator=true:npu custom=Delegate:External,ExtDelegateLib:libvx_delegate.so silent=FALSE name=tensor_filter latency=1 !
tensor_sink name=tensor_sink t. !
imxvideoconvert_g2d !
cairooverlay name=tensor_res !
queue max-size-buffers=2 leaky=2 !
fpsdisplaysink name=img_tensor text-overlay=false video-sink=waylandsink sync=false

 

+

2024.01.03

# cd /home/root/.nxp-demo-experience/scripts/machine_learning
# python3 nndetection.py /dev/video3 NPU /home/root/.cache/gopoint/mobilenet_ssd_v2_coco_quant_postprocess.tflite /home/root/.cache/gopoint/coco_labels.txt

 

gst-launch 로도 실행은 되는데 callback 처리가 안되서 overlay가 출력이 안되어 동일한 화면을 보여주진 않는다.

gst-launch-1.0 v4l2src name=cam_src device=/dev/video3 ! imxvideoconvert_g2d ! video/x-raw,width=1920,height=1080,framerate=30/1,format=BGRx ! tee name=t t. ! imxvideoconvert_g2d ! video/x-raw,width=300,height=300, ! queue max-size-buffers=2 leaky=2 ! videoconvert ! video/x-raw,format=RGB ! tensor_converter ! tensor_filter framework=tensorflow-lite model=/home/root/.cache/gopoint/mobilenet_ssd_v2_coco_quant_postprocess.tflite accelerator=true:npu custom=Delegate:External,ExtDelegateLib:libvx_delegate.so silent=FALSE name=tensor_filter latency=1 !  tensor_sink name=tensor_sink t. ! imxvideoconvert_g2d ! cairooverlay name=tensor_res ! queue max-size-buffers=2 leaky=2 ! fpsdisplaysink name=img_tensor text-overlay=false video-sink=waylandsink sync=false
Posted by 구차니

자동완성으로 해보니 몇가지 나오는데 mnist 말고는 몰라서 찾아보는 중

>>> tf.keras.datasets.
tf.keras.datasets.boston_housing  tf.keras.datasets.cifar100        tf.keras.datasets.imdb            tf.keras.datasets.reuters         
tf.keras.datasets.cifar10         tf.keras.datasets.fashion_mnist   tf.keras.datasets.mnist

 

imdb는 영화 db

boston_housing은 statlib 사이트에서 정의된 보스톤 주택가격

reuter는 46 주제에 따른 11228 뉴스(로이터 뉴스) 인 듯.

This is a dataset of 11,228 newswires from Reuters, labeled over 46 topics.

[링크 : https://www.tensorflow.org/api_docs/python/tf/keras/datasets/boston_housing/load_data]

[링크 : https://www.tensorflow.org/api_docs/python/tf/keras/datasets]

 

cifar10은 10개 클래스니까.. 결과도 MNIST 처럼 10개로 나올 것 같고..

The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, with 6000 images per class. There are 50000 training images and 10000 test images.

[링크 : https://www.tensorflow.org/datasets/catalog/cifar10?hl=en]

 

This dataset is just like the CIFAR-10, except it has 100 classes containing 600 images each. There are 500 training images and 100 testing images per class. The 100 classes in the CIFAR-100 are grouped into 20 superclasses. Each image comes with a "fine" label (the class to which it belongs) and a "coarse" label (the superclass to which it belongs).

[링크 : https://www.tensorflow.org/datasets/catalog/cifar100?hl=en]

[링크 : https://www.tensorflow.org/datasets/catalog/fashion_mnist?hl=en]

[링크 : https://www.tensorflow.org/datasets/catalog/emnist?hl=en]

'프로그램 사용 > yolo_tensorflow' 카테고리의 다른 글

주피터 노트북 프로젝트(?) 실행하기  (0) 2024.01.02
i.mx8mp gopoint 실행 경로  (0) 2024.01.02
tensorflow lite / mnist 학습  (0) 2024.01.02
yolo-label  (0) 2022.03.22
tflite bazel rpi3b+  (0) 2022.01.27
Posted by 구차니

신기하네.. 그냥 알아서 받네?

>>> mnist = tf.keras.datasets.mnist
>>> (train_images, train_labels), (test_images, test_labels) = mnist.load_data()
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
11490434/11490434 [==============================] - 1s 0us/step

 

학습하고 tflite 파일로 저장하기

import tensorflow as tf
import numpy as np

mnist = tf.keras.datasets.mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

train_images = train_images.astype(np.float32) / 255.0
test_images = test_images.astype(np.float32) / 255.0

model = tf.keras.Sequential([
  tf.keras.layers.InputLayer(input_shape=(28, 28)),
  tf.keras.layers.Reshape(target_shape=(28, 28, 1)),
  tf.keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation='relu'),
  tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(10)
])

model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(
                  from_logits=True),
              metrics=['accuracy'])
model.fit(
  train_images,
  train_labels,
  epochs=5,
  validation_data=(test_images, test_labels)
)

# 일반 모델로 변환
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# 요건 차이 없음
# converter = tf.lite.TFLiteConverter.from_keras_model(model)
# converter.optimizations = [tf.lite.Optimize.DEFAULT]
# tflite_model_quant = converter.convert()

# quant 를 하려면 아래 코드 실행해야 함
def representative_data_gen():
  for input_value in tf.data.Dataset.from_tensor_slices(train_images).batch(1).take(100):
    yield [input_value]

converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_data_gen
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.uint8
converter.inference_output_type = tf.uint8
tflite_model_quant = converter.convert()

# 파일로 저장하기
import pathlib

tflite_models_dir = pathlib.Path("/tmp/mnist_tflite_models/")
tflite_models_dir.mkdir(exist_ok=True, parents=True)

# Save the unquantized/float model:
tflite_model_file = tflite_models_dir/"mnist_model.tflite"
tflite_model_file.write_bytes(tflite_model)

# Save the quantized model:
tflite_model_quant_file = tflite_models_dir/"mnist_model_quant.tflite"
tflite_model_quant_file.write_bytes(tflite_model_quant)

[링크 : https://www.tensorflow.org/lite/performance/post_training_integer_quant?hl=ko]

 

netron을 통해 생성한걸 보는데 quant나 그냥이나 어째 차이가 없냐?

 

 

+

quantization 하면 uint8로 변경된다.

 

그나저나, MNIST에 대해서 오해가 있었다.

출력이 [1,10] 인데 0~9 까지의 숫자에 대한 필기 데이터베이스지 알파벳이 아니란 것 -_-!

그래서 출력이 딱 10개인 건 당연하다는 것..

[링크 : https://en.wikipedia.org/wiki/MNIST_database]

 

+

EMNIST 라고 알파벳 손글씨가 따로 있다.

[링크 : https://www.nist.gov/itl/products-and-services/emnist-dataset]

[링크 : https://www.tensorflow.org/datasets/catalog/emnist?hl=ko]

'프로그램 사용 > yolo_tensorflow' 카테고리의 다른 글

i.mx8mp gopoint 실행 경로  (0) 2024.01.02
tensorflow keras dataset  (0) 2024.01.02
yolo-label  (0) 2022.03.22
tflite bazel rpi3b+  (0) 2022.01.27
bazel cross compile  (0) 2022.01.27
Posted by 구차니

왼쪽은 PC(ubuntu 22.04)

오른쪽은 arm에서 소스 받아서 직접 빌드.

동일한 tessdata 를 이용했는데 인식율에 차이가 어마어마하게 난다 -_-

 

$ tesseract --dpi 132 스크린샷\ 2023-12-26\ 17-19-43.png - -v
tesseract 4.1.1
 leptonica-1.82.0
  libgif 5.1.9 : libjpeg 8d (libjpeg-turbo 2.1.1) : libpng 1.6.37 : libtiff 4.3.0 : zlib 1.2.11 : libwebp 1.2.2 : libopenjp2 2.4.0
 Found AVX2
 Found AVX
 Found FMA
 Found SSE
 Found libarchive 3.6.0 zlib/1.2.11 liblzma/5.2.5 bz2lib/1.0.8 liblz4/1.9.3 libzstd/1.4.8
Warning:guessing pitch as xheight on row 1, block 1
Pec ea a

ach Seer

OEE PC LU)

Pee at Pere EEC
Rae ientcee Prete meee ceed

aa MEL ig
Pace
Pace Cra ur ue ecg

feta
Reset

ERTL Ee a Peay



# ./tesseract /home/root/a.png - -v
tesseract 5.3.3
 leptonica-1.84.0
  libjpeg 6b (libjpeg-turbo 2.1.5.1) : libpng 1.6.39 : zlib 1.2.13 : libopenjp2 2.5.0
 Found NEON
 Found libarchive 3.6.2 zlib/1.2.13 liblzma/5.4.4 bz2lib/1.0.8 libzstd/1.5.4
 Found libcurl/8.0.1 OpenSSL/3.1.3 zlib/1.2.13 libidn2/2.3.4
Error in pixReadMemTiff: function not present
Error in pixReadMem: tiff: no pix returned
Error in pixaGenerateFontFromString: pix not made
Error in bmfCreate: font pixa not made
Estimating resolution as 132
Warning:guessing pitch as xheight on row 1, block 1
1.MX8MP Evaluation Kit

.MxeMP 1.50 GHz

2020-06-07 2848 MB RAM

> Console Options wait, 65535 means
Select Language <Standard English> Reset

> Device Manager
> Boot Manager
> Boot Maintenance Manager

Continue
Reset

ay=Move Highlight <Enter>=Select Entry

 

--dpi 옵션 유용..한가?

[링크 : https://simmigyeong.tistory.com/3]

'프로그램 사용 > tesseract ocr' 카테고리의 다른 글

tesseract 학습 데이터  (0) 2023.12.27
tesseract on arm  (0) 2023.12.26
tesseract ocr  (0) 2023.12.21
번호판 인식(tesseract)  (0) 2021.10.14
Posted by 구차니

strace로 추적해보니 신기한(?) 파일 발견

eng는 그냥 data인데

osd.traineddata는 Matlab v4 mat-file로 나온다. 맞나.. 우연인가?

 

$ ls -alh /usr/share/tesseract-ocr/4.00/tessdata
합계 15M
drwxr-xr-x 4 root root 4.0K 12월 21 10:48 .
drwxr-xr-x 3 root root 4.0K  5월 30  2023 ..
drwxr-xr-x 2 root root 4.0K  5월 30  2023 configs
-rw-r--r-- 1 root root 4.0M  9월 16  2017 eng.traineddata
-rw-r--r-- 1 root root  11M  9월 16  2017 osd.traineddata
-rw-r--r-- 1 root root  572  2월  9  2022 pdf.ttf
drwxr-xr-x 2 root root 4.0K  5월 30  2023 tessconfigs

$ file *.traineddata
eng.traineddata: data
osd.traineddata: Matlab v4 mat-file (little endian) , text, rows 4294967295, columns 4294967295, imaginary


$ tree /usr/share/tesseract-ocr/4.00/tessdata
.
├── configs
│   ├── alto
│   ├── ambigs.train
│   ├── api_config
│   ├── bigram
│   ├── box.train
│   ├── box.train.stderr
│   ├── digits
│   ├── get.images
│   ├── hocr
│   ├── inter
│   ├── kannada
│   ├── linebox
│   ├── logfile
│   ├── lstm.train
│   ├── lstmbox
│   ├── lstmdebug
│   ├── makebox
│   ├── pdf
│   ├── quiet
│   ├── rebox
│   ├── strokewidth
│   ├── tsv
│   ├── txt
│   ├── unlv
│   └── wordstrbox
├── eng.traineddata
├── osd.traineddata
├── pdf.ttf
└── tessconfigs
    ├── batch
    ├── batch.nochop
    ├── matdemo
    ├── msdemo
    ├── nobatch
    └── segdemo

2 directories, 34 files

 

kor 은 15메가

[링크 : https://github.com/tesseract-ocr/tessdata]

 

 

+

실행하니 당연히(?) tesseract용 데이터가 없다고 오류가 발생한다.

혹시나 해서 변수는 확인해도 비어있고

pc에서 복사해서 설정해주니 ok

~/tesseract-main/build/bin# ./tesseract /home/root/a.png -
Error in pixReadMemTiff: function not present
Error in pixReadMem: tiff: no pix returned
Error in pixaGenerateFontFromString: pix not made
Error in bmfCreate: font pixa not made
Error opening data file ./eng.traineddata
Please make sure the TESSDATA_PREFIX environment variable is set to your "tessdata" directory.
Failed loading language 'eng'
Tesseract couldn't load any languages!
Could not initialize tesseract.
root@imx8mpevk:~/tesseract-main/build/bin# echo $TESSDATA_PREFIX

# export TESSDATA_PREFIX=/home/root/tessdata

 

아래 이미지를 인식시킴(pc는 dpi가 이상하다고 인식을 못함)

 

의외로 잘 인식한다.

그런데 pc 에서는 인식을 이상하게 했지만 0.2초

i.mx8 에서는 인식을 잘했지만 1.6초. 흐음.. 실시간으로 돌리기에는 무리군..

# time ./tesseract /home/root/a.png -
Error in pixReadMemTiff: function not present
Error in pixReadMem: tiff: no pix returned
Error in pixaGenerateFontFromString: pix not made
Error in bmfCreate: font pixa not made
Estimating resolution as 132
Warning:guessing pitch as xheight on row 1, block 1
1.MX8MP Evaluation Kit

.MxeMP 1.50 GHz

2020-06-07 2848 MB RAM

> Console Options wait, 65535 means
Select Language <Standard English> Reset

> Device Manager
> Boot Manager
> Boot Maintenance Manager

Continue
Reset

ay=Move Highlight <Enter>=Select Entry

real 0m1.646s
user 0m1.578s
sys 0m0.060s

'프로그램 사용 > tesseract ocr' 카테고리의 다른 글

tesseract 버전별 차이?  (0) 2023.12.27
tesseract on arm  (0) 2023.12.26
tesseract ocr  (0) 2023.12.21
번호판 인식(tesseract)  (0) 2021.10.14
Posted by 구차니

빌드 하려고 해도 빡세다

 

tesseact 빌드 하려니 leptonica라는게 필요하다고 에러나고

# cmake ..
-- Configuring tesseract version 5.3.3...
-- IPO / LTO not supported: <Change Dir: /home/root/tesseract-main/build/CMakeFiles/_CMakeLTOTest-CXX/bin

Run Build Command(s):/usr/bin/make -f Makefile && /usr/bin/cmake -S/home/root/tesseract-main/build/CMakeFiles/_CMakeLTOTest-CXX/src -B/home/root/tesseract-main/build/CMakeFiles/_CMakeLTOTest-CXX/bin --check-build-system CMakeFiles/Makefile.cmake 0
/usr/bin/cmake -E cmake_progress_start /home/root/tesseract-main/build/CMakeFiles/_CMakeLTOTest-CXX/bin/CMakeFiles /home/root/tesseract-main/build/CMakeFiles/_CMakeLTOTest-CXX/bin//CMakeFiles/progress.marks
/usr/bin/make  -f CMakeFiles/Makefile2 all
make[1]: Entering directory '/home/root/tesseract-main/build/CMakeFiles/_CMakeLTOTest-CXX/bin'
/usr/bin/make  -f CMakeFiles/foo.dir/build.make CMakeFiles/foo.dir/depend
make[2]: Entering directory '/home/root/tesseract-main/build/CMakeFiles/_CMakeLTOTest-CXX/bin'
cd /home/root/tesseract-main/build/CMakeFiles/_CMakeLTOTest-CXX/bin && /usr/bin/cmake -E cmake_depends "Unix Makefiles" /home/root/tesseract-main/build/CMakeFiles/_CMakeLTOTest-CXX/src /home/root/tesseract-main/build/CMakeFiles/_CMakeLTOTest-CXX/src /home/root/tesseract-main/build/CMakeFiles/_CMakeLTOTest-CXX/bin /home/root/tesseract-main/build/CMakeFiles/_CMakeLTOTest-CXX/bin /home/root/tesseract-main/build/CMakeFiles/_CMakeLTOTest-CXX/bin/CMakeFiles/foo.dir/DependInfo.cmake
make[2]: Leaving directory '/home/root/tesseract-main/build/CMakeFiles/_CMakeLTOTest-CXX/bin'
/usr/bin/make  -f CMakeFiles/foo.dir/build.make CMakeFiles/foo.dir/build
make[2]: Entering directory '/home/root/tesseract-main/build/CMakeFiles/_CMakeLTOTest-CXX/bin'
[ 25%] Building CXX object CMakeFiles/foo.dir/foo.cpp.o
/usr/bin/c++   -flto=auto -fno-fat-lto-objects -o CMakeFiles/foo.dir/foo.cpp.o -c /home/root/tesseract-main/build/CMakeFiles/_CMakeLTOTest-CXX/src/foo.cpp
[ 50%] Linking CXX static library libfoo.a
/usr/bin/cmake -P CMakeFiles/foo.dir/cmake_clean_target.cmake
/usr/bin/cmake -E cmake_link_script CMakeFiles/foo.dir/link.txt --verbose=1
"CMAKE_CXX_COMPILER_AR-NOTFOUND" cr libfoo.a CMakeFiles/foo.dir/foo.cpp.o
Error running link command: No such file or directory
make[2]: *** [CMakeFiles/foo.dir/build.make:100: libfoo.a] Error 2
make[2]: Leaving directory '/home/root/tesseract-main/build/CMakeFiles/_CMakeLTOTest-CXX/bin'
make[1]: *** [CMakeFiles/Makefile2:88: CMakeFiles/foo.dir/all] Error 2
make[1]: Leaving directory '/home/root/tesseract-main/build/CMakeFiles/_CMakeLTOTest-CXX/bin'
make: *** [Makefile:94: all] Error 2

>
-- CMAKE_SYSTEM_PROCESSOR=<aarch64>
-- LTO build is not supported on arm/RBPi.
-- Could NOT find Leptonica (missing: Leptonica_DIR)
-- Checking for module 'lept>=1.74'
--   No package 'lept' found
CMake Error at CMakeLists.txt:404 (message):
  Cannot find required library Leptonica.  Quitting!


-- Configuring incomplete, errors occurred!
See also "/home/root/tesseract-main/build/CMakeFiles/CMakeOutput.log".
See also "/home/root/tesseract-main/build/CMakeFiles/CMakeError.log".

[링크 : https://github.com/tesseract-ocr/tesseract]

 

leptonica 라는거 빌드하려니 이젠 openjp2가 없다고 -_-

# cmake ..
-- Could NOT find GIF (missing: GIF_LIBRARY GIF_INCLUDE_DIR) (Required is at least version "5")
-- Could NOT find TIFF (missing: TIFF_LIBRARY TIFF_INCLUDE_DIR) 
-- Could NOT find WebP (missing: WebP_DIR)
-- WebP_INCLUDE_DIR : WebP_INCLUDE_DIR-NOTFOUND
WebP_MUX_INCLUDE_DIR: WebP_MUX_INCLUDE_DIR-NOTFOUND
WebP_LIBRARY: WebP_LIBRARY-NOTFOUND
WebP_MUX_LIBRARY: WebP_MUX_LIBRARY-NOTFOUND
CMake Error at /usr/lib/openjpeg-2.5/OpenJPEGTargets.cmake:95 (message):
  The imported target "openjp2_static" references the file

     "/usr/lib/libopenjp2.a"

  but this file does not exist.  Possible reasons include:

  * The file was deleted, renamed, or moved to another location.

  * An install or uninstall procedure did not complete successfully.

  * The installation package was faulty and contained

     "/usr/lib/openjpeg-2.5/OpenJPEGTargets.cmake"

  but not all the files it references.

Call Stack (most recent call first):
  /usr/lib/openjpeg-2.5/OpenJPEGConfig.cmake:28 (include)
  CMakeLists.txt:186 (find_package)


-- Configuring incomplete, errors occurred!
See also "/home/root/leptonica-master/build/CMakeFiles/CMakeOutput.log".

 

귀찮으니(!) 걍 주석으로 막고 인자가 3개인 녀석으로 강제 빌드!

//#if OPJ_VERSION_MINOR == 0                                                
//    opj_stream_set_user_data(l_stream, fp);                               
//#else                                                                  
    opj_stream_set_user_data(l_stream, fp,                               
                             (opj_stream_free_user_data_fn)NULL);        
//#endif                                                    

[링크 : https://github.com/danbloomberg/leptonica]

[링크 : http://www.leptonica.org/]

 

openjpeg는 별다른 에러없이 빌드 완료

[링크 : https://github.com/uclouvain/openjpeg]

 

leptonica 에서

src/jp2kio.c 에서 아래의 부분에서 MINOR가 0으로 되면서 구버전(?) 소스를 호출하는지 함수의 형태가 달라서 에러난다.

/home/root/leptonica-master/src/jp2kio.c: In function 'opjCreateStream':
/home/root/leptonica-master/src/jp2kio.c:938:5: error: too few arguments to function 'opj_stream_set_user_data'
  938 |     opj_stream_set_user_data(l_stream, fp);
      |     ^~~~~~~~~~~~~~~~~~~~~~~~

 

tesseract 다시 하려니 pangocairo 패키지가 없다고 에러.. 젠장.. 이거 이전에 weston 할때

개고생(?) 시키던 이름이 악몽으로 떠오르는데?

--   No package 'pangocairo' found

 

meson 깔고

# pip3 install meson
Collecting meson
  Downloading meson-1.3.0-py3-none-any.whl (976 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 976.4/976.4 kB 3.5 MB/s eta 0:00:00
Installing collected packages: meson
Successfully installed meson-1.3.0

 

pango 빌드 하려는데 git 필요하대서 git 깔고

WARNING: CMake Toolchain: Failed to determine CMake compilers state
Run-time dependency fribidi found: NO (tried pkgconfig and cmake)
Looking for a fallback subproject for the dependency fribidi

../meson.build:218:14: ERROR: Git program not found, cannot download fribidi.wrap via git.

 

아래 링크에서 받아서 빌드..

$ wget https://www.kernel.org/pub/software/scm/git/git-2.43.0.tar.gz

 

pango 다시 빌드하는데 ninja 없으면 배쨈! 시전하고

$ mkdir build
$ cd build
$ meson ..
ERROR: Could not detect Ninja v1.8.2 or newer

 

ninja 빌드하고 설치하려는데 또 백태클 -_-

그러니 cmake를 멀리하고

 12%] Building CXX object CMakeFiles/libninja.dir/src/dyndep_parser.cc.o
In file included from /home/root/ninja-master/build/_deps/googletest-src/googletest/src/gtest-all.cc:42:
/home/root/ninja-master/build/_deps/googletest-src/googletest/src/gtest-death-test.cc: In function 'bool testing::internal::StackGrowsDown()':
/home/root/ninja-master/build/_deps/googletest-src/googletest/src/gtest-death-test.cc:1301:24: error: 'dummy' may be used uninitialized [-Werror=maybe-uninitialized]
 1301 |   StackLowerThanAddress(&dummy, &result);
      |   ~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~

 

파이썬으로 빌드하는게 낫....

# python3 configure.py --bootstrap

[링크 : https://github.com/ninja-build/ninja]

 

다시 pango으로 돌아와서

$ meson ..
Found ninja-1.12.0.git at /usr/bin/ninja
WARNING: Running the setup command as `meson [options]` instead of `meson setup [options]` is ambiguous and deprecated.

 

build 안에서 ninja 치니 알아서 빌드. + 설치

~/pango-main/build# ninja
~/pango-main/build# ninja install

 

pango도 했는데 tesseract 다시 빌드 하려니 안된다 -_-

그냥 설치하는게 아니라 패키지로 만들어서 했어야 했나?

-- Checking for modules 'pango>=1.38.0;cairo;pangoft2;pangocairo;fontconfig'
--   No package 'pango' found
--   No package 'pangoft2' found
--   No package 'pangocairo' found

 

귀찮으니(!) cmake에서 패키지 확인하지 않도록 수정

~/tesseract-main/build# vi ../src/training/CMakeLists.txt
  if(PKG_CONFIG_FOUND OR SW_BUILD)
                   
    if(PKG_CONFIG_FOUND)  
      pkg_check_modules(
        PANGO                            
        REQUIRED
        IMPORTED_TARGET    
#        pango>=1.38.0
#        cairo       
#        pangoft2              
#        pangocairo
        fontconfig)  
    endif()

 

[ 95%] Building CXX object src/training/CMakeFiles/pango_training.dir/pango/ligature_table.cpp.o
In file included from /home/root/tesseract-main/src/training/pango/ligature_table.cpp:24:
/home/root/tesseract-main/src/training/pango/pango_font_info.h:27:10: fatal error: pango/pango-font.h: No such file or directory
   27 | #include "pango/pango-font.h"
      |          ^~~~~~~~~~~~~~~~~~~~

[ 96%] Building CXX object src/training/CMakeFiles/pango_training.dir/pango/tlog.cpp.o
In file included from /home/root/tesseract-main/src/training/pango/stringrenderer.cpp:20:
/home/root/tesseract-main/src/training/pango/stringrenderer.h:33:10: fatal error: pango/pango-layout.h: No such file or directory
   33 | #include "pango/pango-layout.h"
      |          ^~~~~~~~~~~~~~~~~~~~~~
compilation terminated.
make[2]: *** [src/training/CMakeFiles/pango_training.dir/build.make:118: src/training/CMakeFiles/pango_training.dir/pango/stringrenderer.cpp.o] Error 1
make[2]: *** Waiting for unfinished jobs....
In file included from /home/root/tesseract-main/src/training/pango/ligature_table.cpp:24:
/home/root/tesseract-main/src/training/pango/pango_font_info.h:27:10: fatal error: pango/pango-font.h: No such file or directory
   27 | #include <pango/pango-font.h>
      |          ^~~~~~~~~~~~~~~~~~~~
compilation terminated.
make[2]: *** [src/training/CMakeFiles/pango_training.dir/build.make:90: src/training/CMakeFiles/pango_training.dir/pango/ligature_table.cpp.o] Error 1
In file included from /home/root/tesseract-main/src/training/pango/pango_font_info.cpp:32:
/home/root/tesseract-main/src/training/pango/pango_font_info.h:27:10: fatal error: pango/pango-font.h: No such file or directory
   27 | #include <pango/pango-font.h>
      |          ^~~~~~~~~~~~~~~~~~~~
compilation terminated.

 

아마 패키지에서 배째는거 같은데 수작업으로 변경.. -_-

#include "pango/pango-font.h"
#include "pango/pango.h"
#include "pango/pangocairo.h"

#include <pango/pango-font.h>
#include <pango/pango.h>
#include <pango/pangocairo.h>

 

밑도 끝도 없네 -_-

[ 96%] Building CXX object src/training/CMakeFiles/pango_training.dir/pango/pango_font_info.cpp.o
In file included from /usr/local/include/pango/pango-font.h:25,
                 from /usr/local/include/pango/pango-attributes.h:25,
                 from /usr/local/include/pango/pango-layout.h:25,
                 from /home/root/tesseract-main/src/training/pango/stringrenderer.h:33,
                 from /home/root/tesseract-main/src/training/pango/stringrenderer.cpp:20:
/usr/local/include/pango/pango-coverage.h:25:10: fatal error: glib-object.h: No such file or directory
   25 | #include <glib-object.h>
      |          ^~~~~~~~~~~~~~~
compilation terminated.
make[2]: *** [src/training/CMakeFiles/pango_training.dir/build.make:118: src/training/CMakeFiles/pango_training.dir/pango/stringrenderer.cpp.o] Error 1
make[2]: *** Waiting for unfinished jobs....
In file included from /usr/local/include/pango/pango-font.h:25,
                 from /home/root/tesseract-main/src/training/pango/pango_font_info.h:27,
                 from /home/root/tesseract-main/src/training/pango/ligature_table.cpp:24:
/usr/local/include/pango/pango-coverage.h:25:10: fatal error: glib-object.h: No such file or directory
   25 | #include <glib-object.h>
      |          ^~~~~~~~~~~~~~~
compilation terminated.
make[2]: *** [src/training/CMakeFiles/pango_training.dir/build.make:90: src/training/CMakeFiles/pango_training.dir/pango/ligature_table.cpp.o] Error 1
In file included from /usr/local/include/pango/pango-font.h:25,
                 from /home/root/tesseract-main/src/training/pango/pango_font_info.h:27,
                 from /home/root/tesseract-main/src/training/pango/pango_font_info.cpp:32:
/usr/local/include/pango/pango-coverage.h:25:10: fatal error: glib-object.h: No such file or directory
   25 | #include <glib-object.h>
      |          ^~~~~~~~~~~~~~~
compilation terminated.

 

[ 96%] Building CXX object src/training/CMakeFiles/pango_training.dir/pango/stringrenderer.cpp.o
In file included from /usr/local/include/pango/pango-coverage.h:25,
                 from /usr/local/include/pango/pango-font.h:25,
                 from /usr/local/include/pango/pango-attributes.h:25,
                 from /usr/local/include/pango/pango-layout.h:25,
                 from /home/root/tesseract-main/src/training/pango/stringrenderer.h:33,
                 from /home/root/tesseract-main/src/training/pango/stringrenderer.cpp:20:
/usr/include/glib-2.0/glib-object.h:24:10: fatal error: gobject/gbinding.h: No such file or directory
   24 | #include <gobject/gbinding.h>
      |          ^~~~~~~~~~~~~~~~~~~~
compilation terminated.
make[2]: *** [src/training/CMakeFiles/pango_training.dir/build.make:118: src/training/CMakeFiles/pango_training.dir/pango/stringrenderer.cpp.o] Error 1
make[2]: *** Waiting for unfinished jobs....
In file included from /usr/local/include/pango/pango-coverage.h:25,
                 from /usr/local/include/pango/pango-font.h:25,
                 from /home/root/tesseract-main/src/training/pango/pango_font_info.h:27,
                 from /home/root/tesseract-main/src/training/pango/ligature_table.cpp:24:
/usr/include/glib-2.0/glib-object.h:24:10: fatal error: gobject/gbinding.h: No such file or directory
   24 | #include <gobject/gbinding.h>
      |          ^~~~~~~~~~~~~~~~~~~~
compilation terminated.
make[2]: *** [src/training/CMakeFiles/pango_training.dir/build.make:90: src/training/CMakeFiles/pango_training.dir/pango/ligature_table.cpp.o] Error 1
In file included from /usr/local/include/pango/pango-coverage.h:25,
                 from /usr/local/include/pango/pango-font.h:25,
                 from /home/root/tesseract-main/src/training/pango/pango_font_info.h:27,
                 from /home/root/tesseract-main/src/training/pango/pango_font_info.cpp:32:
/usr/include/glib-2.0/glib-object.h:24:10: fatal error: gobject/gbinding.h: No such file or directory
   24 | #include <gobject/gbinding.h>
      |          ^~~~~~~~~~~~~~~~~~~~
compilation terminated.

 

inclue path를 찾는데 Makefile에 없어서 뒤져보니.. 저게 맞길..

# grep -rn "\-I" .
./CMakeFiles/libtesseract.dir/flags.make:7:CXX_INCLUDES = -I/home/root/tesseract-main/include -I/home/root/tesseract-main/src/arch -I/home/root/tesseract-main/src/ccmain -I/home/root/tesseract-main/src/ccstruct -I/home/root/tesseract-main/src/ccutil -I/home/root/tesseract-main/src/classify -I/home/root/tesseract-main/src/cutil -I/home/root/tesseract-main/src/dict -I/home/root/tesseract-main/src/lstm -I/home/root/tesseract-main/src/opencl -I/home/root/tesseract-main/src/textord -I/home/root/tesseract-main/src/viewer -I/home/root/tesseract-main/src/wordrec -I/home/root/tesseract-main/src/training -I/home/root/tesseract-main/src -I/usr/local/include/leptonica -I/home/root/tesseract-main/build -I/home/root/tesseract-main/build/include
./CMakeFiles/tesseract.dir/flags.make:7:CXX_INCLUDES = -I/usr/local/include/leptonica -I/home/root/tesseract-main/build -I/home/root/tesseract-main/build/include -I/home/root/tesseract-main/include -I/home/root/tesseract-main/src/arch -I/home/root/tesseract-main/src/ccmain -I/home/root/tesseract-main/src/ccstruct -I/home/root/tesseract-main/src/ccutil -I/home/root/tesseract-main/src/classify -I/home/root/tesseract-main/src/cutil -I/home/root/tesseract-main/src/dict -I/home/root/tesseract-main/src/lstm -I/home/root/tesseract-main/src/opencl -I/home/root/tesseract-main/src/textord -I/home/root/tesseract-main/src/viewer -I/home/root/tesseract-main/src/wordrec -I/home/root/tesseract-main/src/training

'프로그램 사용 > tesseract ocr' 카테고리의 다른 글

tesseract 버전별 차이?  (0) 2023.12.27
tesseract 학습 데이터  (0) 2023.12.27
tesseract ocr  (0) 2023.12.21
번호판 인식(tesseract)  (0) 2021.10.14
Posted by 구차니

오픈소스 OCR로 우분투에 패키지로 설치가 가능하고

gimagereader 라는 frontend도 있으니 참고를..

[링크 : https://sourceforge.net/projects/gimagereader/]

 

간단한 사용법은 아래와 같이 파일명 - -l eng 해주면 되는데, 아마 -를 넣어서 stdout으로 출력하라는걸지도?

$ tesseract images/eurotext.png - -l eng

[링크 : https://tesseract-ocr.github.io/tessdoc/Command-Line-Usage.html]

 

psm은 어떤식으로 읽을지 모드를 설정해주는데 OSD(Orientation and Script Detection) 이라는게

있냐 없냐로, 2문자 까지 인식하게 할 순 있다(--psm 1)

+

psm 10으로 하면 단문자도 인식하게 설정이 가능하다

$ tesseract --help-extra
Usage:
  tesseract --help | --help-extra | --help-psm | --help-oem | --version
  tesseract --list-langs [--tessdata-dir PATH]
  tesseract --print-parameters [options...] [configfile...]
  tesseract imagename|imagelist|stdin outputbase|stdout [options...] [configfile...]

OCR options:
  --tessdata-dir PATH   Specify the location of tessdata path.
  --user-words PATH     Specify the location of user words file.
  --user-patterns PATH  Specify the location of user patterns file.
  --dpi VALUE           Specify DPI for input image.
  -l LANG[+LANG]        Specify language(s) used for OCR.
  -c VAR=VALUE          Set value for config variables.
                        Multiple -c arguments are allowed.
  --psm NUM             Specify page segmentation mode.
  --oem NUM             Specify OCR Engine mode.
NOTE: These options must occur before any configfile.

Page segmentation modes:
  0    Orientation and script detection (OSD) only.
  1    Automatic page segmentation with OSD.
  2    Automatic page segmentation, but no OSD, or OCR. (not implemented)
  3    Fully automatic page segmentation, but no OSD. (Default)
  4    Assume a single column of text of variable sizes.
  5    Assume a single uniform block of vertically aligned text.
  6    Assume a single uniform block of text.
  7    Treat the image as a single text line.
  8    Treat the image as a single word.
  9    Treat the image as a single word in a circle.
 10    Treat the image as a single character.
 11    Sparse text. Find as much text as possible in no particular order.
 12    Sparse text with OSD.
 13    Raw line. Treat the image as a single text line,
       bypassing hacks that are Tesseract-specific.

OCR Engine modes: (see https://github.com/tesseract-ocr/tesseract/wiki#linux)
  0    Legacy engine only.
  1    Neural nets LSTM engine only.
  2    Legacy + LSTM engines.
  3    Default, based on what is available.

Single options:
  -h, --help            Show minimal help message.
  --help-extra          Show extra help for advanced users.
  --help-psm            Show page segmentation modes.
  --help-oem            Show OCR Engine modes.
  -v, --version         Show version information.
  --list-langs          List available languages for tesseract engine.
  --print-parameters    Print tesseract parameters.

[링크 : https://kokokorin-bigbox.tistory.com/53]

 

테스트 해보니 1 문자는 어떻게 하든 안되는 것 같은데 회피가 가능하려나?

 

 

$ tesseract a5.png - --psm 3
Warning: Invalid resolution 0 dpi. Using 70 instead.
Estimating resolution as 406
ak

$ tesseract a5.png - --psm 2
Warning: Invalid resolution 0 dpi. Using 70 instead.
Estimating resolution as 406
Orientation: 0
WritingDirection: 0
TextlineOrder: 2
Deskew angle: 0.0000

$ tesseract a5.png - --psm 1
Warning: Invalid resolution 0 dpi. Using 70 instead.
Estimating resolution as 406
Too few characters. Skipping this page
OSD: Weak margin (0.00) for 2 blob text block, but using orientation anyway: 0
ak


$ tesseract a5.png - --psm 0
Warning: Invalid resolution 0 dpi. Using 70 instead.
Estimating resolution as 406
Too few characters. Skipping this page
Warning. Invalid resolution 0 dpi. Using 70 instead.
Too few characters. Skipping this page
Error during processing.

 

 

 

$ tesseract a6.png - --psm 0
Warning: Invalid resolution 0 dpi. Using 70 instead.
Estimating resolution as 294
Too few characters. Skipping this page
Warning. Invalid resolution 0 dpi. Using 70 instead.
Too few characters. Skipping this page
Error during processing.

$ tesseract a6.png - --psm 1
Warning: Invalid resolution 0 dpi. Using 70 instead.
Estimating resolution as 294
Too few characters. Skipping this page
OSD: Weak margin (0.00) for 1 blob text block, but using orientation anyway: 0
Empty page!!
Estimating resolution as 294
Too few characters. Skipping this page
OSD: Weak margin (0.00) for 1 blob text block, but using orientation anyway: 0
Empty page!!

$ tesseract a6.png - --psm 2
Warning: Invalid resolution 0 dpi. Using 70 instead.
Estimating resolution as 294
Empty page!!

$ tesseract a6.png - --psm 3
Warning: Invalid resolution 0 dpi. Using 70 instead.
Estimating resolution as 294
Empty page!!
Estimating resolution as 294
Empty page!!

'프로그램 사용 > tesseract ocr' 카테고리의 다른 글

tesseract 버전별 차이?  (0) 2023.12.27
tesseract 학습 데이터  (0) 2023.12.27
tesseract on arm  (0) 2023.12.26
번호판 인식(tesseract)  (0) 2021.10.14
Posted by 구차니

Neural Network gstreamer인가?

 

NNStreamer provides a set of GStreamer plugins so developers may apply neural networks, attach related frameworks (including ROSIIOFlatBuffers, and Protocol Buffers), and manipulate tensor data streams in GStreamer pipelines easily and execute such pipelines efficiently

[링크 : https://nnstreamer.ai/#get-started]

'프로그램 사용 > gstreamer' 카테고리의 다른 글

gstreamer parse_launch  (0) 2024.01.11
gst-device-monitor-1.0  (0) 2023.12.06
gstremaer videobox + videomixer  (0) 2023.04.10
gst-inspector.c  (0) 2023.04.06
gstreamer videomixer 반쪽 성공  (0) 2023.03.27
Posted by 구차니

v4l2src 관련해서 기존에 웹캠에서 지원하는 모드들 찾는다고 고생했는데 간단한 유틸리티를 알게 됨 -_-

 

$ man gst-device-monitor-1.0
SYNOPSIS
       gst-device-monitor-1.0 [DEVICE_CLASSES[:FILTER_CAPS]] [DEVICE_CLASSES[:FILTER_CAPS]]

 

$ gst-device-monitor-1.0 Video/Source
Probing devices...


Device found:

name  : 720p HD Camera
class : Video/Source
caps  : image/jpeg, width=1280, height=720, framerate=30/1
        image/jpeg, width=640, height=480, framerate=30/1
        image/jpeg, width=640, height=360, framerate=30/1
        image/jpeg, width=352, height=288, framerate=30/1
        image/jpeg, width=320, height=240, framerate=30/1
        image/jpeg, width=160, height=120, framerate=30/1
        video/x-raw, format=YUY2, width=1280, height=720, framerate=10/1
        video/x-raw, format=YUY2, width=640, height=480, framerate=30/1
        video/x-raw, format=YUY2, width=640, height=360, framerate=30/1
        video/x-raw, format=YUY2, width=352, height=288, framerate=30/1
        video/x-raw, format=YUY2, width=320, height=240, framerate=30/1
        video/x-raw, format=YUY2, width=160, height=120, framerate=30/1
properties:
object.path = v4l2:/dev/video0
device.api = v4l2
media.class = Video/Source
device.product.id = 308
device.vendor.id = 11134
api.v4l2.path = /dev/video0
api.v4l2.cap.driver = uvcvideo
api.v4l2.cap.card = "720p\ HD\ Camera:\ 720p\ HD\ Camera"
api.v4l2.cap.bus_info = usb-0000:00:14.0-6
api.v4l2.cap.version = 6.2.16
api.v4l2.cap.capabilities = 84a00001
api.v4l2.cap.device-caps = 04200001
device.id = 33
node.name = v4l2_input.pci-0000_00_14.0-usb-0_6_1.0
node.description = "720p\ HD\ Camera"
factory.name = api.v4l2.source
node.pause-on-idle = false
factory.id = 10
client.id = 32
clock.quantum-limit = 8192
media.role = Camera
node.driver = true
object.id = 35
object.serial = 35
gst-launch-1.0 pipewiresrc path=35 ! ...

'프로그램 사용 > gstreamer' 카테고리의 다른 글

gstreamer parse_launch  (0) 2024.01.11
nnstreamer  (0) 2023.12.20
gstremaer videobox + videomixer  (0) 2023.04.10
gst-inspector.c  (0) 2023.04.06
gstreamer videomixer 반쪽 성공  (0) 2023.03.27
Posted by 구차니