Добавил:
Developer Опубликованный материал нарушает ваши авторские права? Сообщите нам.
Вуз: Предмет: Файл:
Скачиваний:
0
Добавлен:
18.01.2024
Размер:
3.24 Кб
Скачать
"""
Script which takes one or more file paths and reports on their detected
encodings

Example::

    % chardetect somefile someotherfile
    somefile: windows-1252 with confidence 0.5
    someotherfile: ascii with confidence 1.0

If no paths are provided, it takes its input from stdin.

"""


import argparse
import sys
from typing import Iterable, List, Optional

from .. import __version__
from ..universaldetector import UniversalDetector


def description_of(
    lines: Iterable[bytes],
    name: str = "stdin",
    minimal: bool = False,
    should_rename_legacy: bool = False,
) -> Optional[str]:
    """
    Return a string describing the probable encoding of a file or
    list of strings.

    :param lines: The lines to get the encoding of.
    :type lines: Iterable of bytes
    :param name: Name of file or collection of lines
    :type name: str
    :param should_rename_legacy:  Should we rename legacy encodings to
                                  their more modern equivalents?
    :type should_rename_legacy:   ``bool``
    """
    u = UniversalDetector(should_rename_legacy=should_rename_legacy)
    for line in lines:
        line = bytearray(line)
        u.feed(line)
        # shortcut out of the loop to save reading further - particularly useful if we read a BOM.
        if u.done:
            break
    u.close()
    result = u.result
    if minimal:
        return result["encoding"]
    if result["encoding"]:
        return f'{name}: {result["encoding"]} with confidence {result["confidence"]}'
    return f"{name}: no result"


def main(argv: Optional[List[str]] = None) -> None:
    """
    Handles command line arguments and gets things started.

    :param argv: List of arguments, as if specified on the command-line.
                 If None, ``sys.argv[1:]`` is used instead.
    :type argv: list of str
    """
    # Get command line arguments
    parser = argparse.ArgumentParser(
        description=(
            "Takes one or more file paths and reports their detected encodings"
        )
    )
    parser.add_argument(
        "input",
        help="File whose encoding we would like to determine. (default: stdin)",
        type=argparse.FileType("rb"),
        nargs="*",
        default=[sys.stdin.buffer],
    )
    parser.add_argument(
        "--minimal",
        help="Print only the encoding to standard output",
        action="store_true",
    )
    parser.add_argument(
        "-l",
        "--legacy",
        help="Rename legacy encodings to more modern ones.",
        action="store_true",
    )
    parser.add_argument(
        "--version", action="version", version=f"%(prog)s {__version__}"
    )
    args = parser.parse_args(argv)

    for f in args.input:
        if f.isatty():
            print(
                "You are running chardetect interactively. Press "
                "CTRL-D twice at the start of a blank line to signal the "
                "end of your input. If you want help, run chardetect "
                "--help\n",
                file=sys.stderr,
            )
        print(
            description_of(
                f, f.name, minimal=args.minimal, should_rename_legacy=args.legacy
            )
        )


if __name__ == "__main__":
    main()
Соседние файлы в папке cli