Comments (5)
@jhillyerd what type of encoding? Do you have a requirement to decode RFC2047 QP and B64 header values to plain-text?
from enmime.
I wrote this a while back to preserve the ordinality of the headers with decoded values, however the resultant Decoded []byte
value is for humans, ie- it's unicode and doesn't respect folding-whitespace or line-length requirements.
type HeadersPreserved struct {
Decoded []byte
Headers Headers
}
type Headers []Header
type Header struct {
Name string
Value string
}
func Sort(b []byte) (*HeadersPreserved, error) {
b = Clean(b)
tr := textproto.NewReader(bufio.NewReader(bytes.NewReader(b)))
headers, err := tr.ReadMIMEHeader()
switch errors.Cause(err) {
case nil, io.EOF:
// carry on, io.EOF is expected
default:
return nil, err
}
bs := bufio.NewScanner(bufio.NewReader(bytes.NewReader(b)))
res := Headers{}
bw := &bytes.Buffer{}
for bs.Scan() {
line := bs.Text()
if strings.HasPrefix(line, " ") || strings.HasPrefix(line, "\t") {
continue
}
i := strings.Index(line, ":")
if i == -1 {
continue
}
header := textproto.CanonicalMIMEHeaderKey(line[:i])
if len(headers[header]) == 0 {
// somethings up, we should have already processed all of these, so why are we trying to fetch from an empty list, did we miscount?
continue
}
// pop
firstValue := headers[header][0]
// shift
headers[header] = headers[header][1:]
h := Header{Name: header, Value: RFC2047parts(firstValue)}
res = append(res, h)
bw.WriteString(fmt.Sprintf("%s: %s\r\n", h.Name, h.Value))
}
bw.WriteString("\r\n")
return &HeadersPreserved{
Decoded: bw.Bytes(),
Headers: res,
}, nil
}
func Clean(b []byte) []byte {
slice := bytes.SplitAfter(b, []byte{'\r', '\n'})
dest := make([]byte, 0, len(b)+2)
headers := true
for _, v := range slice {
if (bytes.Index(v, []byte{':'}) > -1 || bytes.HasPrefix(v, []byte{' '}) || bytes.HasPrefix(v, []byte{'\t'})) && headers {
dest = append(dest, v...)
continue
}
if headers {
headers = false
if !bytes.Equal(v, []byte{'\r', '\n'}) {
dest = append(dest, append([]byte{'\r', '\n'}, v...)...)
continue
}
}
dest = append(dest, v...)
}
return dest
}
// RFC2047parts checks if the value contains content encoded in RFC2047 format
// RFC2047 Example:
// `=?UTF-8?B?bmFtZT0iw7DCn8KUwoo=?=`
func RFC2047parts(s string) string {
s = strings.Map(func(r rune) rune {
if r == '\n' || r == '\r' {
return ' '
}
return r
}, s)
var err error
for {
s, err = rfc2047recurse(s)
switch err {
case nil:
continue
default:
return s
}
}
}
// rfc2047recurse is called for if the value contains content encoded in RFC2047 format and decodes it
// RFC2047 Example:
// `=?UTF-8?B?bmFtZT0iw7DCn8KUwoo=?=`
func rfc2047recurse(s string) (string, error) {
us := strings.ToUpper(s)
if !strings.Contains(us, "?Q?") && !strings.Contains(us, "?B?") {
return s, io.EOF
}
val, err := decodeHeader(s)
if err != nil {
return val, err
}
if val == s {
val, err = decodeHeader(fixRFC2047String(val))
if err != nil {
return val, err
}
if val == s {
return val, io.EOF
}
}
return val, nil
}
// decodeHeader decodes a single line (per RFC 2047) using Golang's mime.WordDecoder
func decodeHeader(input string) (string, error) {
dec := new(mime.WordDecoder)
dec.CharsetReader = NewCharsetReader
header, err := dec.DecodeHeader(input)
if err != nil {
return input, err
}
return header, nil
}
func fixRFC2047String(s string) string {
inString := false
eq := false
q := 0
sb := &strings.Builder{}
for _, v := range s {
switch v {
case '=':
if q == 3 {
inString = false
} else {
eq = true
}
sb.WriteRune(v)
case '?':
if eq {
inString = true
} else {
q += 1
}
eq = false
sb.WriteRune(v)
case '\n', '\r', ' ':
if !inString {
sb.WriteRune(v)
}
eq = false
default:
eq = false
sb.WriteRune(v)
}
}
return sb.String()
}
For the NewCharsetReader
, just use the one in the enmime internal pkg...
from enmime.
Yes, essentially human readable decoding. All I really want is for enmime to do the exact same decoding it does now when building an Envelope, but then to stop before it starts trying to process the body of the email.
from enmime.
@jhillyerd just let me know where you envision this being implemented and any special rules or strictures for output formatting. Got some free cycles for a week or so.
from enmime.
So the exact problem I'm trying to solve is here:
https://github.com/inbucket/inbucket/blob/master/pkg/message/manager.go#L53
I parse an entire email with enmime, but all I care about in that scenario is the From, To and Subject in UTF-8 from the primary header.
Returning the Envelope struct isn't mandatory. Let me know if that clarifies things.
from enmime.
Related Issues (20)
- Field accessors for MailBuilder HOT 1
- golangci workflow broken
- Character decoding characters of other written languages for .Text HOT 3
- Delivery Status Notification parsing (bounce messages) HOT 1
- TestReadHeader/equals_in_name fails on Go1.20 HOT 3
- Export coding package? HOT 1
- Bug in validHeaderFieldByte(): characters disallowed in http header field are allowed in email header field HOT 7
- Any plan to tag a new version? HOT 1
- Attachment not detected HOT 3
- Breaking change: do not modify header case? HOT 2
- Request: Additional case-normalizing before passing into mime.ParseMediaType
- Type of the `(*enmime.Part).Header` field is now private HOT 3
- BCC addresses are not added HOT 5
- Use `errors.Is()` and `errors.As()` to compare error values and types
- EML Attactment Issue
- Parser can loose the original content format
- Failing to find attachments for emails sent by apple mail HOT 2
- ReadPartErrorPolicy not called for part content with malformed base64
- Update golangci-lint linters config
- BUG: GetHeader() doesn't add quote for especials
Recommend Projects
-
React
A declarative, efficient, and flexible JavaScript library for building user interfaces.
-
Vue.js
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
-
Typescript
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
-
TensorFlow
An Open Source Machine Learning Framework for Everyone
-
Django
The Web framework for perfectionists with deadlines.
-
Laravel
A PHP framework for web artisans
-
D3
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
-
Recommend Topics
-
javascript
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
-
web
Some thing interesting about web. New door for the world.
-
server
A server is a program made to process requests and deliver data to clients.
-
Machine learning
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
-
Visualization
Some thing interesting about visualization, use data art
-
Game
Some thing interesting about game, make everyone happy.
Recommend Org
-
Facebook
We are working to build community through open source technology. NB: members must have two-factor auth.
-
Microsoft
Open source projects and samples from Microsoft.
-
Google
Google ❤️ Open Source for everyone.
-
Alibaba
Alibaba Open Source for everyone
-
D3
Data-Driven Documents codes.
-
Tencent
China tencent open source team.
from enmime.