repl.it
@Kristinita/

demo__codereview__decodefilestoutf8

Python

Demo module, that decode file to UTF-8 if no UTF-8

fork
loading
Files
  • main.py
  • Kira1.md
  • Kira2.md
  • Kira3.md
  • requirements.txt
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# @Author: SashaChernykh
# @Date: 2018-09-01 13:31:06
# @Last Modified time: 2018-09-01 14:39:30
"""kira_encoding module."""
import codecs
import glob

import chardet

ALL_FILES = glob.glob('*.md')


def kira_encoding_function():
    """Check encoding and convert to UTF-8, if encoding no UTF-8."""
    for filename in ALL_FILES:

        # Not 100% accuracy:
        # https://stackoverflow.com/a/436299/5951529
        # Check:
        # https://chardet.readthedocs.io/en/latest/usage.html#example-using-the-detect-function
        # https://stackoverflow.com/a/37531241/5951529
        with open(filename, 'rb') as opened_file:
            bytes_file = opened_file.read()
            chardet_data = chardet.detect(bytes_file)
            fileencoding = (chardet_data['encoding'])
            print('fileencoding', fileencoding)

            if fileencoding in ['utf-8', 'ascii']:
                print(filename + ' in UTF-8 encoding')
            else:
                # Convert file to UTF-8:
                # https://stackoverflow.com/q/19932116/5951529
                with codecs.open(filename, 'r') as file_for_conversion:
                    read_file_for_conversion = file_for_conversion.read()
                with codecs.open(filename, 'w', 'utf-8') as converted_file:
                    converted_file.write(read_file_for_conversion)
                print(filename +
                      ' in ' +
                      fileencoding +
                      ' encoding automatically converted to UTF-8')


kira_encoding_function()