GitLab is used only for code review, issue tracking and project management. Canonical locations for source code are still https://gitweb.torproject.org/ https://git.torproject.org/ and git-rw.torproject.org.

Commit 277ba71b authored by Cecylia Bocovich's avatar Cecylia Bocovich

Make locale parser more robust

This change expands the locale parse to have the following properties:
- if only the language code is given, choses the regionalization that
occurs first in the locale list (e.g., "en" --> "en-US"
- if regionalization for the language is *not* present, choses the
generalized language or a different regionalization (e.g. "pt-PT" -->
"pt-BR")
- parses both the subject and body looking for the most specific
regionalization
- defaults to en-US if no available language is found
parent 4fa4d636
......@@ -116,8 +116,12 @@ class EmailParser(object):
def parse_keywords(self, text, request):
for word in re.split(r"\s+", text.strip()):
if word.lower() in self.locales:
request["language"] = word.lower()
for locale in self.locales:
if word.lower() == locale.lower():
request["language"] = locale
elif (not request["language"]) and (word.lower()[:2] ==
locale.lower()[:2]):
request["language"] = locale
if word.lower() in self.platforms:
request["command"] = "links"
request["platform"] = word.lower()
......@@ -143,8 +147,11 @@ class EmailParser(object):
subject = subject.group(1)
request = self.parse_keywords(subject, request)
if not request["command"] or not request["language"]:
request = self.parse_keywords(msg_str, request)
# Always parse the body too, to see if there's more specific information
request = self.parse_keywords(msg_str, request)
if not request["language"]:
request["language"] = "en-US"
return request
......
......@@ -82,17 +82,61 @@ class EmailServiceTests(unittest.TestCase):
def test_language_email_parser(self):
ep = conftests.EmailParser(self.settings, "gettor@torproject.org")
ep.locales = ["en", "ru"]
request = ep.parse("From: \"silvia [hiro]\" <hiro@torproject.org>\n Subject: \r\n Reply-To: hiro@torproject.org \nTo: gettor@torproject.org\n osx en")
self.assertEqual(request["command"], "links")
self.assertEqual(request["platform"], "osx")
self.assertEqual(request["language"], "en")
request = ep.parse("From: \"silvia [hiro]\" <hiro@torproject.org>\n Subject: \r\n Reply-To: hiro@torproject.org \nTo: gettor@torproject.org\n linux ru")
self.assertEqual(request["command"], "links")
self.assertEqual(request["platform"], "linux")
self.assertEqual(request["language"], "ru")
ep.locales = ["en-US", "es-ES", "es-AR", "pt-BR", "fa"]
request = ep.parse("From: \"silvia [hiro]\" <hiro@torproject.org>\n"
"Subject: \r\n Reply-To: hiro@torproject.org \nTo:"
"gettor@torproject.org\n osx en")
self.assertEqual(request["language"], "en-US")
request = ep.parse("From: \"silvia [hiro]\" <hiro@torproject.org>\n"
"Subject: \r\n Reply-To: hiro@torproject.org \nTo:"
"gettor@torproject.org\n osx ES")
self.assertEqual(request["language"], "es-ES")
request = ep.parse("From: \"silvia [hiro]\" <hiro@torproject.org>\n"
"Subject: \r\n Reply-To: hiro@torproject.org \nTo:"
"gettor@torproject.org\n osx en-US")
self.assertEqual(request["language"], "en-US")
request = ep.parse("From: \"silvia [hiro]\" <hiro@torproject.org>\n"
"Subject: \r\n Reply-To: hiro@torproject.org \nTo:"
"gettor@torproject.org\n linux fa")
self.assertEqual(request["language"], "fa")
request = ep.parse("From: \"silvia [hiro]\" <hiro@torproject.org>\n"
"Subject: \r\n Reply-To: hiro@torproject.org \nTo:"
"gettor@torproject.org\n osx es")
self.assertEqual(request["language"], "es-ES")
request = ep.parse("From: \"silvia [hiro]\" <hiro@torproject.org>\n"
"Subject: \r\n Reply-To: hiro@torproject.org \nTo:"
"gettor@torproject.org\n linux zz")
self.assertEqual(request["language"], "en-US")
request = ep.parse("From: \"silvia [hiro]\" <hiro@torproject.org>\n"
"Subject: \r\n Reply-To: hiro@torproject.org \nTo:"
"gettor@torproject.org\n linux pt-PT")
self.assertEqual(request["language"], "pt-BR")
request = ep.parse("From: \"silvia [hiro]\" <hiro@torproject.org>\n"
"Subject: \r\n Reply-To: hiro@torproject.org \nTo:"
"gettor@torproject.org\n linux es-AR")
self.assertEqual(request["language"], "es-AR")
request = ep.parse("From: \"silvia [hiro]\" <hiro@torproject.org>\n"
"Subject: linux es\r\n Reply-To: hiro@torproject.org \nTo:"
"gettor@torproject.org\n linux es-AR")
self.assertEqual(request["language"], "es-AR")
request = ep.parse("From: \"silvia [hiro]\" <hiro@torproject.org>\n"
"Subject: linux es\r\n Reply-To: hiro@torproject.org \nTo:"
"gettor@torproject.org\n linux")
self.assertEqual(request["language"], "es-ES")
request = ep.parse("From: \"silvia [hiro]\" <hiro@torproject.org>\n"
"Subject: linux es-AR\r\n Reply-To: hiro@torproject.org \nTo:"
"gettor@torproject.org\n linux es")
self.assertEqual(request["language"], "es-AR")
def test_sent_links_message(self):
ep = self.sm_client
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment