repl.it
@Kristinita/

demo__codereview__decodefilestoutf8__fixed

Python

Fixed example of https://codereview.stackexchange.com/q/202928/132441

fork
loading
Files
  • main.py
  • Kira1.md
  • Kira2.md
  • Kira3.md
  • requirements.txt
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# @Author: SashaChernykh
# @Date: 2018-09-01 13:31:06
# @Last Modified time: 2018-09-01 21:24:44
"""kira_encoding module."""
import glob
import sys

import chardet

from logbook import Logger
from logbook import StreamHandler

# Logbook basic usage:
# https://logbook.readthedocs.io/en/stable/
StreamHandler(sys.stdout).push_application()
LOG = Logger('Logbook')

ALL_FILES = glob.glob('*.md')


def kira_encoding_function():
    """Check encoding and convert to UTF-8, if encoding no UTF-8."""
    for filename in ALL_FILES:

        # Chardet not 100% accuracy guarantees:
        # https://stackoverflow.com/a/436299/5951529
        # Check:
        # https://chardet.readthedocs.io/en/latest/usage.html#example-using-the-detect-function
        # https://stackoverflow.com/a/37531241/5951529
        # r+b mode is open the binary file in read or write mode.
        # https://stackoverflow.com/a/15746971/5951529
        with open(filename, 'r+b') as opened_file:
            bytes_file = opened_file.read()
            chardet_data = chardet.detect(bytes_file)
            fileencoding = (chardet_data['encoding'])

            if fileencoding in ['utf-8', 'ascii']:
                LOG.info(filename + ' in UTF-8 encoding')
            else:
                # Decode:
                # https://stackoverflow.com/a/38102444/5951529
                # Encode:
                # https://stackoverflow.com/a/37376668/5951529
                encoded_file = bytes_file.decode(fileencoding).encode()
                LOG.info(filename +
                         ' in ' +
                         fileencoding +
                         ' encoding automatically converted to UTF-8')
                # Seek:
                # https://stackoverflow.com/a/2424410/5951529
                # https://stackoverflow.com/a/11696554/5951529
                opened_file.seek(0)
                opened_file.write(encoded_file)
                # Doesn't need truncate in write mode:
                # https://stackoverflow.com/a/4562477/5951529


kira_encoding_function()