GithubHelp home page GithubHelp logo

Comments (5)

requaos avatar requaos commented on August 28, 2024

@jhillyerd what type of encoding? Do you have a requirement to decode RFC2047 QP and B64 header values to plain-text?

from enmime.

requaos avatar requaos commented on August 28, 2024

I wrote this a while back to preserve the ordinality of the headers with decoded values, however the resultant Decoded []byte value is for humans, ie- it's unicode and doesn't respect folding-whitespace or line-length requirements.

type HeadersPreserved struct {
	Decoded []byte
	Headers Headers
}

type Headers []Header

type Header struct {
	Name  string
	Value string
}

func Sort(b []byte) (*HeadersPreserved, error) {
	b = Clean(b)
	tr := textproto.NewReader(bufio.NewReader(bytes.NewReader(b)))
	headers, err := tr.ReadMIMEHeader()
	switch errors.Cause(err) {
	case nil, io.EOF:
	// carry on, io.EOF is expected
	default:
		return nil, err
	}
	bs := bufio.NewScanner(bufio.NewReader(bytes.NewReader(b)))
	res := Headers{}
	bw := &bytes.Buffer{}
	for bs.Scan() {
		line := bs.Text()
		if strings.HasPrefix(line, " ") || strings.HasPrefix(line, "\t") {
			continue
		}
		i := strings.Index(line, ":")
		if i == -1 {
			continue
		}
		header := textproto.CanonicalMIMEHeaderKey(line[:i])
		if len(headers[header]) == 0 {
			// somethings up, we should have already processed all of these, so why are we trying to fetch from an empty list, did we miscount?
			continue
		}
		// pop
		firstValue := headers[header][0]
		// shift
		headers[header] = headers[header][1:]

		h := Header{Name: header, Value: RFC2047parts(firstValue)}
		res = append(res, h)
		bw.WriteString(fmt.Sprintf("%s: %s\r\n", h.Name, h.Value))
	}
	bw.WriteString("\r\n")

	return &HeadersPreserved{
		Decoded: bw.Bytes(),
		Headers: res,
	}, nil
}

func Clean(b []byte) []byte {
	slice := bytes.SplitAfter(b, []byte{'\r', '\n'})
	dest := make([]byte, 0, len(b)+2)
	headers := true
	for _, v := range slice {
		if (bytes.Index(v, []byte{':'}) > -1 || bytes.HasPrefix(v, []byte{' '}) || bytes.HasPrefix(v, []byte{'\t'})) && headers {
			dest = append(dest, v...)
			continue
		}
		if headers {
			headers = false
			if !bytes.Equal(v, []byte{'\r', '\n'}) {
				dest = append(dest, append([]byte{'\r', '\n'}, v...)...)
				continue
			}
		}
		dest = append(dest, v...)
	}

	return dest
}

// RFC2047parts checks if the value contains content encoded in RFC2047 format
// RFC2047 Example:
//     `=?UTF-8?B?bmFtZT0iw7DCn8KUwoo=?=`
func RFC2047parts(s string) string {
	s = strings.Map(func(r rune) rune {
		if r == '\n' || r == '\r' {
			return ' '
		}
		return r
	}, s)
	var err error
	for {
		s, err = rfc2047recurse(s)
		switch err {
		case nil:
			continue
		default:
			return s
		}
	}
}

// rfc2047recurse is called for if the value contains content encoded in RFC2047 format and decodes it
// RFC2047 Example:
//     `=?UTF-8?B?bmFtZT0iw7DCn8KUwoo=?=`
func rfc2047recurse(s string) (string, error) {
	us := strings.ToUpper(s)
	if !strings.Contains(us, "?Q?") && !strings.Contains(us, "?B?") {
		return s, io.EOF
	}

	val, err := decodeHeader(s)
	if err != nil {
		return val, err
	}
	if val == s {
		val, err = decodeHeader(fixRFC2047String(val))
		if err != nil {
			return val, err
		}
		if val == s {
			return val, io.EOF
		}
	}

	return val, nil
}

// decodeHeader decodes a single line (per RFC 2047) using Golang's mime.WordDecoder
func decodeHeader(input string) (string, error) {
	dec := new(mime.WordDecoder)
	dec.CharsetReader = NewCharsetReader
	header, err := dec.DecodeHeader(input)
	if err != nil {
		return input, err
	}
	return header, nil
}

func fixRFC2047String(s string) string {
	inString := false
	eq := false
	q := 0
	sb := &strings.Builder{}
	for _, v := range s {
		switch v {
		case '=':
			if q == 3 {
				inString = false
			} else {
				eq = true
			}
			sb.WriteRune(v)
		case '?':
			if eq {
				inString = true
			} else {
				q += 1
			}
			eq = false
			sb.WriteRune(v)
		case '\n', '\r', ' ':
			if !inString {
				sb.WriteRune(v)
			}
			eq = false
		default:
			eq = false
			sb.WriteRune(v)
		}
	}
	return sb.String()
}

For the NewCharsetReader, just use the one in the enmime internal pkg...

from enmime.

jhillyerd avatar jhillyerd commented on August 28, 2024

Yes, essentially human readable decoding. All I really want is for enmime to do the exact same decoding it does now when building an Envelope, but then to stop before it starts trying to process the body of the email.

from enmime.

requaos avatar requaos commented on August 28, 2024

@jhillyerd just let me know where you envision this being implemented and any special rules or strictures for output formatting. Got some free cycles for a week or so.

from enmime.

jhillyerd avatar jhillyerd commented on August 28, 2024

So the exact problem I'm trying to solve is here:

https://github.com/inbucket/inbucket/blob/master/pkg/message/manager.go#L53

I parse an entire email with enmime, but all I care about in that scenario is the From, To and Subject in UTF-8 from the primary header.

Returning the Envelope struct isn't mandatory. Let me know if that clarifies things.

from enmime.

Related Issues (20)

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. 📊📈🎉

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google ❤️ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.