ExcelBanter

ExcelBanter (https://www.excelbanter.com/)
-   Excel Programming (https://www.excelbanter.com/excel-programming/)
-   -   txt-file is UTF8 or DOS. (https://www.excelbanter.com/excel-programming/378484-txt-file-utf8-dos.html)

Joergen Bondesen

txt-file is UTF8 or DOS.
 
Hi NG

I am using Excel VBA to open a txt-file and evaluate data.

Sometimes I receive a txt-file in UTF8 and not DOS. This is a big probleme
for me.

Is there a VBA way to 'controle' the txt-file and give me feedback for UTF8
and DOS.

Is it with VBA possible to converte the file from UTF8 to DOS. Today I am
using UltraEdit.

--
Best Regards from
Joergen Bondesen



Michel Pierron

txt-file is UTF8 or DOS.
 
Hi Joergen,
Something like:

Option Explicit
Private Declare Function MultiByteToWideChar Lib "kernel32" _
(ByVal CodePage As Long, ByVal dwFlags As Long _
, ByVal lpMultiByteStr As String, ByVal cchMultiByte As Long _
, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long) As Long
Private Declare Function WideCharToMultiByte Lib "kernel32" _
(ByVal CodePage As Long, ByVal dwFlags As Long _
, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long _
, ByVal lpMultiByteStr As String, ByVal cchMultiByte As Long _
, ByVal lpDefaultChar As Long, ByVal lpUsedDefaultChar As Long) As Long
Private Const CP_UTF8 = 65001

Sub Joergen()
Const sPath$ = "Path of the file to be tested\"
Const sFile$ = "The name of the text file to be tested.txt"
Const tFile$ = "My decoded file.txt"
MsgBox Utf8Encoding(sPath & sFile, sPath & tFile), 64
End Sub

' Returns True if encoding = utf-8
' If encoding utf-8 then convert to ANSI
' and save under SaveAs (if SaveAs = ""
' then replace txtFile).
Private Function Utf8Encoding(txtFile As String _
, Optional SaveAs As String = "") As Boolean
If InStr(1, txtFile, ".txt", 1) = 0 Then Exit Function
If Dir(txtFile) = "" Then Exit Function
Dim i&, b() As Byte
Dim Buffer$, f%: f = FreeFile
Open txtFile For Binary Access Read As #f
Buffer = String(LOF(f), Chr(0))
Get #f, , Buffer
Close #f
For i = 1 To 3 ' UTF-8 BOM = EF BB BF
If Asc(Mid(Buffer, i, 1)) < Choose(i, 239, 187, 191) _
Then Exit Function
Next i
Buffer = Mid$(Buffer, 4)
Dim s As Variant: s = Split(Buffer, vbCrLf)
If SaveAs = "" Then SaveAs = txtFile
On Error Resume Next
Kill SaveAs ' Delete any existing file.
On Error GoTo 0
f = FreeFile ' Save the file.
Open SaveAs For Output As #f
For i = 0 To UBound(s)
Print #f, UTF8ToA(s(i))
Next i
Close f
Utf8Encoding = True
End Function

Private Function UTF8ToA(ByVal wText As String) As String
Dim vNeeded&, vSize&: vSize = Len(wText)
vNeeded = MultiByteToWideChar(CP_UTF8, 0, wText, vSize, 0, 0)
UTF8ToA = String(vNeeded, 0)
MultiByteToWideChar CP_UTF8, 0, wText, vSize, StrPtr(UTF8ToA), vNeeded
End Function

Regards,
MP

"Joergen Bondesen" a écrit dans le message de news:
...
Hi NG

I am using Excel VBA to open a txt-file and evaluate data.

Sometimes I receive a txt-file in UTF8 and not DOS. This is a big probleme
for me.

Is there a VBA way to 'controle' the txt-file and give me feedback for
UTF8 and DOS.

Is it with VBA possible to converte the file from UTF8 to DOS. Today I am
using UltraEdit.

--
Best Regards from
Joergen Bondesen




Joergen Bondesen

txt-file is UTF8 or DOS.
 
Hi Michel

Thanks, I am impressed.
Look below where I have added my comments starting with '//, please.
I do hope you can help me.


Option Explicit

Private Declare Function MultiByteToWideChar Lib "kernel32" _
(ByVal CodePage As Long, ByVal dwFlags As Long _
, ByVal lpMultiByteStr As String, ByVal cchMultiByte As Long _
, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long) As Long


Private Declare Function WideCharToMultiByte Lib "kernel32" _
(ByVal CodePage As Long, ByVal dwFlags As Long _
, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long _
, ByVal lpMultiByteStr As String, ByVal cchMultiByte As Long _
, ByVal lpDefaultChar As Long, ByVal lpUsedDefaultChar As Long) As Long

Private Const CP_UTF8 = 65001

Sub Joergen()
'// My consts
Const sPath$ = "C:\"
Const sFile$ = "UTF8toDOS.txt"
Const tFile$ = "My decoded file.txt"

MsgBox Utf8Encoding(sPath & sFile, sPath & tFile), 64
End Sub


Private Function UTF8ToA(ByVal wText As String) As String
Dim vNeeded&, vSize&: vSize = Len(wText)
vNeeded = MultiByteToWideChar(CP_UTF8, 0, wText, vSize, 0, 0)
UTF8ToA = String(vNeeded, 0)
MultiByteToWideChar CP_UTF8, 0, wText, vSize, StrPtr(UTF8ToA), vNeeded
End Function


' Returns True if encoding = utf-8
' If encoding utf-8 then convert to ANSI
' and save under SaveAs (if SaveAs = ""
' then replace txtFile).
Private Function Utf8Encoding(txtFile As String _
, Optional SaveAs As String = "") As Boolean

If InStr(1, txtFile, ".txt", 1) = 0 Then Exit Function

If Dir(txtFile) = "" Then Exit Function

Dim i&, b() As Byte
Dim Buffer$, f%: f = FreeFile

Open txtFile For Binary Access Read As #f
Buffer = String(LOF(f), Chr(0))
Get #f, , Buffer
Close #f

'// When I read the file in UltraEdit (texteditor) in
'// hex mode, the 2 first sign is '255, 254' "disappears".
'// When I converte the file to DOS, these 2 sign
'// so I conclude (prehaps wrong) I can determine if I
'// have a unix file.

'// I also think that: 'Get #f, , Buffer' give me a "DOS"
'// line so below is not the way. Can you help me further.

' For i = 1 To 3 ' UTF-8 BOM = EF BB BF
' If Asc(Mid(Buffer, i, 1)) < Choose(i, 239, 187, 191) _
' Then Exit Function
' Next i


'// This cut the first 3 sign in firste line so therefore
'// have I change 4 to 1.
'Buffer = Mid$(Buffer, 4)
Buffer = Mid$(Buffer, 1)

Dim s As Variant: s = Split(Buffer, vbCrLf)
If SaveAs = "" Then SaveAs = txtFile

On Error Resume Next
Kill SaveAs ' Delete any existing file.
On Error GoTo 0

f = FreeFile ' Save the file.
Open SaveAs For Output As #f

For i = 0 To UBound(s)
'// Avoid "empty" line after last record
Dim NewLine As String
NewLine = UTF8ToA(s(i))

If NewLine < vbNullString Then
Print #f, NewLine
End If
Next i
' For i = 0 To UBound(s)
' Print #f, UTF8ToA(s(i))
' Next i
Close f

Utf8Encoding = True
End Function

--
Best Regards from
Joergen Bondesen


"Michel Pierron" wrote in message
...
Hi Joergen,
Something like:

Option Explicit
Private Declare Function MultiByteToWideChar Lib "kernel32" _
(ByVal CodePage As Long, ByVal dwFlags As Long _
, ByVal lpMultiByteStr As String, ByVal cchMultiByte As Long _
, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long) As Long
Private Declare Function WideCharToMultiByte Lib "kernel32" _
(ByVal CodePage As Long, ByVal dwFlags As Long _
, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long _
, ByVal lpMultiByteStr As String, ByVal cchMultiByte As Long _
, ByVal lpDefaultChar As Long, ByVal lpUsedDefaultChar As Long) As Long
Private Const CP_UTF8 = 65001

Sub Joergen()
Const sPath$ = "Path of the file to be tested\"
Const sFile$ = "The name of the text file to be tested.txt"
Const tFile$ = "My decoded file.txt"
MsgBox Utf8Encoding(sPath & sFile, sPath & tFile), 64
End Sub

' Returns True if encoding = utf-8
' If encoding utf-8 then convert to ANSI
' and save under SaveAs (if SaveAs = ""
' then replace txtFile).
Private Function Utf8Encoding(txtFile As String _
, Optional SaveAs As String = "") As Boolean
If InStr(1, txtFile, ".txt", 1) = 0 Then Exit Function
If Dir(txtFile) = "" Then Exit Function
Dim i&, b() As Byte
Dim Buffer$, f%: f = FreeFile
Open txtFile For Binary Access Read As #f
Buffer = String(LOF(f), Chr(0))
Get #f, , Buffer
Close #f
For i = 1 To 3 ' UTF-8 BOM = EF BB BF
If Asc(Mid(Buffer, i, 1)) < Choose(i, 239, 187, 191) _
Then Exit Function
Next i
Buffer = Mid$(Buffer, 4)
Dim s As Variant: s = Split(Buffer, vbCrLf)
If SaveAs = "" Then SaveAs = txtFile
On Error Resume Next
Kill SaveAs ' Delete any existing file.
On Error GoTo 0
f = FreeFile ' Save the file.
Open SaveAs For Output As #f
For i = 0 To UBound(s)
Print #f, UTF8ToA(s(i))
Next i
Close f
Utf8Encoding = True
End Function

Private Function UTF8ToA(ByVal wText As String) As String
Dim vNeeded&, vSize&: vSize = Len(wText)
vNeeded = MultiByteToWideChar(CP_UTF8, 0, wText, vSize, 0, 0)
UTF8ToA = String(vNeeded, 0)
MultiByteToWideChar CP_UTF8, 0, wText, vSize, StrPtr(UTF8ToA), vNeeded
End Function

Regards,
MP

"Joergen Bondesen" a écrit dans le message de news:
...
Hi NG

I am using Excel VBA to open a txt-file and evaluate data.

Sometimes I receive a txt-file in UTF8 and not DOS. This is a big
probleme for me.

Is there a VBA way to 'controle' the txt-file and give me feedback for
UTF8 and DOS.

Is it with VBA possible to converte the file from UTF8 to DOS. Today I am
using UltraEdit.

--
Best Regards from
Joergen Bondesen




Michel Pierron

txt-file is UTF8 or DOS.
 
Hi Joergen,
If the first two signs of your file correspond to 255 and 254, it is that
your file is not encoded in UTF-8, but in UTF16 litle endian and it is much
simpler to decode it:

Sub Joergen_2()
'// My consts
Const sPath$ = "C:\"
Const sFile$ = "UTF16toDOS.txt"
Const tFile$ = "My_Result_file.txt""

If GetEncoding(sPath & sFile) < "UTF16L" Then Exit Sub

' Convert Unicode to Ascii
Const ForReading = 1, ForWriting = 2, ForAppending = 8
Const ModeAscii = 0, ModeUnicode = -1
Dim fso As Object, f_in As Object, f_out As Object
Set fso = CreateObject("Scripting.FileSystemObject")
Set f_in = fso.OpenTextFile(sPath & sFile, ForReading, , ModeUnicode)
Set f_out = fso.OpenTextFile(sPath & tFile, ForWriting, True, ModeAscii)
Do Until f_in.AtEndOfStream
f_out.Write f_in.Read(1)
Loop
f_in.Close: f_out.Close
Set f_out = Nothing: Set f_in = Nothing: Set fso = Nothing
End Sub

Private Function GetEncoding(txtFile As String) As String
Dim b(1) As Byte, f%: f = FreeFile
Open txtFile For Binary Access Read As #f
Get #f, , b
Close #f
If b(0) = &HEF And b(1) = &HBB Then
GetEncoding = "UTF-8"
ElseIf b(0) = &HFF And b(1) = &HFE Then
' Litle endian unicode (ucs-2le, ucs-4le, and ucs-16le)
GetEncoding = "UTF16L"
ElseIf b(0) = &HFE And b(1) = &HFF Then
' Big endian unicode (utf-16 and ucs-2)
GetEncoding = "UTF16B"
Else
GetEncoding = "ANSI"
End If
End Function

Regards,
MP


"Joergen Bondesen" a écrit dans le message de news:
...
Hi Michel

Thanks, I am impressed.
Look below where I have added my comments starting with '//, please.
I do hope you can help me.


Option Explicit

Private Declare Function MultiByteToWideChar Lib "kernel32" _
(ByVal CodePage As Long, ByVal dwFlags As Long _
, ByVal lpMultiByteStr As String, ByVal cchMultiByte As Long _
, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long) As Long


Private Declare Function WideCharToMultiByte Lib "kernel32" _
(ByVal CodePage As Long, ByVal dwFlags As Long _
, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long _
, ByVal lpMultiByteStr As String, ByVal cchMultiByte As Long _
, ByVal lpDefaultChar As Long, ByVal lpUsedDefaultChar As Long) As Long

Private Const CP_UTF8 = 65001

Sub Joergen()
'// My consts
Const sPath$ = "C:\"
Const sFile$ = "UTF8toDOS.txt"
Const tFile$ = "My decoded file.txt"

MsgBox Utf8Encoding(sPath & sFile, sPath & tFile), 64
End Sub


Private Function UTF8ToA(ByVal wText As String) As String
Dim vNeeded&, vSize&: vSize = Len(wText)
vNeeded = MultiByteToWideChar(CP_UTF8, 0, wText, vSize, 0, 0)
UTF8ToA = String(vNeeded, 0)
MultiByteToWideChar CP_UTF8, 0, wText, vSize, StrPtr(UTF8ToA), vNeeded
End Function


' Returns True if encoding = utf-8
' If encoding utf-8 then convert to ANSI
' and save under SaveAs (if SaveAs = ""
' then replace txtFile).
Private Function Utf8Encoding(txtFile As String _
, Optional SaveAs As String = "") As Boolean

If InStr(1, txtFile, ".txt", 1) = 0 Then Exit Function

If Dir(txtFile) = "" Then Exit Function

Dim i&, b() As Byte
Dim Buffer$, f%: f = FreeFile

Open txtFile For Binary Access Read As #f
Buffer = String(LOF(f), Chr(0))
Get #f, , Buffer
Close #f

'// When I read the file in UltraEdit (texteditor) in
'// hex mode, the 2 first sign is '255, 254' "disappears".
'// When I converte the file to DOS, these 2 sign
'// so I conclude (prehaps wrong) I can determine if I
'// have a unix file.

'// I also think that: 'Get #f, , Buffer' give me a "DOS"
'// line so below is not the way. Can you help me further.

' For i = 1 To 3 ' UTF-8 BOM = EF BB BF
' If Asc(Mid(Buffer, i, 1)) < Choose(i, 239, 187, 191) _
' Then Exit Function
' Next i


'// This cut the first 3 sign in firste line so therefore
'// have I change 4 to 1.
'Buffer = Mid$(Buffer, 4)
Buffer = Mid$(Buffer, 1)

Dim s As Variant: s = Split(Buffer, vbCrLf)
If SaveAs = "" Then SaveAs = txtFile

On Error Resume Next
Kill SaveAs ' Delete any existing file.
On Error GoTo 0

f = FreeFile ' Save the file.
Open SaveAs For Output As #f

For i = 0 To UBound(s)
'// Avoid "empty" line after last record
Dim NewLine As String
NewLine = UTF8ToA(s(i))

If NewLine < vbNullString Then
Print #f, NewLine
End If
Next i
' For i = 0 To UBound(s)
' Print #f, UTF8ToA(s(i))
' Next i
Close f

Utf8Encoding = True
End Function

--
Best Regards from
Joergen Bondesen


"Michel Pierron" wrote in message
...
Hi Joergen,
Something like:

Option Explicit
Private Declare Function MultiByteToWideChar Lib "kernel32" _
(ByVal CodePage As Long, ByVal dwFlags As Long _
, ByVal lpMultiByteStr As String, ByVal cchMultiByte As Long _
, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long) As Long
Private Declare Function WideCharToMultiByte Lib "kernel32" _
(ByVal CodePage As Long, ByVal dwFlags As Long _
, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long _
, ByVal lpMultiByteStr As String, ByVal cchMultiByte As Long _
, ByVal lpDefaultChar As Long, ByVal lpUsedDefaultChar As Long) As Long
Private Const CP_UTF8 = 65001

Sub Joergen()
Const sPath$ = "Path of the file to be tested\"
Const sFile$ = "The name of the text file to be tested.txt"
Const tFile$ = "My decoded file.txt"
MsgBox Utf8Encoding(sPath & sFile, sPath & tFile), 64
End Sub

' Returns True if encoding = utf-8
' If encoding utf-8 then convert to ANSI
' and save under SaveAs (if SaveAs = ""
' then replace txtFile).
Private Function Utf8Encoding(txtFile As String _
, Optional SaveAs As String = "") As Boolean
If InStr(1, txtFile, ".txt", 1) = 0 Then Exit Function
If Dir(txtFile) = "" Then Exit Function
Dim i&, b() As Byte
Dim Buffer$, f%: f = FreeFile
Open txtFile For Binary Access Read As #f
Buffer = String(LOF(f), Chr(0))
Get #f, , Buffer
Close #f
For i = 1 To 3 ' UTF-8 BOM = EF BB BF
If Asc(Mid(Buffer, i, 1)) < Choose(i, 239, 187, 191) _
Then Exit Function
Next i
Buffer = Mid$(Buffer, 4)
Dim s As Variant: s = Split(Buffer, vbCrLf)
If SaveAs = "" Then SaveAs = txtFile
On Error Resume Next
Kill SaveAs ' Delete any existing file.
On Error GoTo 0
f = FreeFile ' Save the file.
Open SaveAs For Output As #f
For i = 0 To UBound(s)
Print #f, UTF8ToA(s(i))
Next i
Close f
Utf8Encoding = True
End Function

Private Function UTF8ToA(ByVal wText As String) As String
Dim vNeeded&, vSize&: vSize = Len(wText)
vNeeded = MultiByteToWideChar(CP_UTF8, 0, wText, vSize, 0, 0)
UTF8ToA = String(vNeeded, 0)
MultiByteToWideChar CP_UTF8, 0, wText, vSize, StrPtr(UTF8ToA), vNeeded
End Function

Regards,
MP

"Joergen Bondesen" a écrit dans le message de news:
...
Hi NG

I am using Excel VBA to open a txt-file and evaluate data.

Sometimes I receive a txt-file in UTF8 and not DOS. This is a big
probleme for me.

Is there a VBA way to 'controle' the txt-file and give me feedback for
UTF8 and DOS.

Is it with VBA possible to converte the file from UTF8 to DOS. Today I
am using UltraEdit.

--
Best Regards from
Joergen Bondesen







All times are GMT +1. The time now is 03:50 PM.

Powered by vBulletin® Copyright ©2000 - 2025, Jelsoft Enterprises Ltd.
ExcelBanter.com