diff --git a/gettor/parse/email.py b/gettor/parse/email.py index d487684dfd31283c87f72699c86df6385451d20a..874b1cd1662fdf1572d13c380affbaa8e3ba546d 100644 --- a/gettor/parse/email.py +++ b/gettor/parse/email.py @@ -116,8 +116,12 @@ class EmailParser(object): def parse_keywords(self, text, request): for word in re.split(r"\s+", text.strip()): - if word.lower() in self.locales: - request["language"] = word.lower() + for locale in self.locales: + if word.lower() == locale.lower(): + request["language"] = locale + elif (not request["language"]) and (word.lower()[:2] == + locale.lower()[:2]): + request["language"] = locale if word.lower() in self.platforms: request["command"] = "links" request["platform"] = word.lower() @@ -143,8 +147,11 @@ class EmailParser(object): subject = subject.group(1) request = self.parse_keywords(subject, request) - if not request["command"] or not request["language"]: - request = self.parse_keywords(msg_str, request) + # Always parse the body too, to see if there's more specific information + request = self.parse_keywords(msg_str, request) + + if not request["language"]: + request["language"] = "en-US" return request diff --git a/tests/test_email_service.py b/tests/test_email_service.py index 407937c68f01920a8fd8ecea0424a0acd78291a6..00795c1bb437d2e08d95c43b632fc3e023ed0c4f 100644 --- a/tests/test_email_service.py +++ b/tests/test_email_service.py @@ -82,17 +82,61 @@ class EmailServiceTests(unittest.TestCase): def test_language_email_parser(self): ep = conftests.EmailParser(self.settings, "gettor@torproject.org") - ep.locales = ["en", "ru"] - request = ep.parse("From: \"silvia [hiro]\" \n Subject: \r\n Reply-To: hiro@torproject.org \nTo: gettor@torproject.org\n osx en") - self.assertEqual(request["command"], "links") - self.assertEqual(request["platform"], "osx") - self.assertEqual(request["language"], "en") - - request = ep.parse("From: \"silvia [hiro]\" \n Subject: \r\n Reply-To: hiro@torproject.org \nTo: gettor@torproject.org\n linux ru") - self.assertEqual(request["command"], "links") - self.assertEqual(request["platform"], "linux") - self.assertEqual(request["language"], "ru") - + ep.locales = ["en-US", "es-ES", "es-AR", "pt-BR", "fa"] + request = ep.parse("From: \"silvia [hiro]\" \n" + "Subject: \r\n Reply-To: hiro@torproject.org \nTo:" + "gettor@torproject.org\n osx en") + self.assertEqual(request["language"], "en-US") + + request = ep.parse("From: \"silvia [hiro]\" \n" + "Subject: \r\n Reply-To: hiro@torproject.org \nTo:" + "gettor@torproject.org\n osx ES") + self.assertEqual(request["language"], "es-ES") + + request = ep.parse("From: \"silvia [hiro]\" \n" + "Subject: \r\n Reply-To: hiro@torproject.org \nTo:" + "gettor@torproject.org\n osx en-US") + self.assertEqual(request["language"], "en-US") + + request = ep.parse("From: \"silvia [hiro]\" \n" + "Subject: \r\n Reply-To: hiro@torproject.org \nTo:" + "gettor@torproject.org\n linux fa") + self.assertEqual(request["language"], "fa") + + request = ep.parse("From: \"silvia [hiro]\" \n" + "Subject: \r\n Reply-To: hiro@torproject.org \nTo:" + "gettor@torproject.org\n osx es") + self.assertEqual(request["language"], "es-ES") + + request = ep.parse("From: \"silvia [hiro]\" \n" + "Subject: \r\n Reply-To: hiro@torproject.org \nTo:" + "gettor@torproject.org\n linux zz") + self.assertEqual(request["language"], "en-US") + + request = ep.parse("From: \"silvia [hiro]\" \n" + "Subject: \r\n Reply-To: hiro@torproject.org \nTo:" + "gettor@torproject.org\n linux pt-PT") + self.assertEqual(request["language"], "pt-BR") + + request = ep.parse("From: \"silvia [hiro]\" \n" + "Subject: \r\n Reply-To: hiro@torproject.org \nTo:" + "gettor@torproject.org\n linux es-AR") + self.assertEqual(request["language"], "es-AR") + + request = ep.parse("From: \"silvia [hiro]\" \n" + "Subject: linux es\r\n Reply-To: hiro@torproject.org \nTo:" + "gettor@torproject.org\n linux es-AR") + self.assertEqual(request["language"], "es-AR") + + request = ep.parse("From: \"silvia [hiro]\" \n" + "Subject: linux es\r\n Reply-To: hiro@torproject.org \nTo:" + "gettor@torproject.org\n linux") + self.assertEqual(request["language"], "es-ES") + + request = ep.parse("From: \"silvia [hiro]\" \n" + "Subject: linux es-AR\r\n Reply-To: hiro@torproject.org \nTo:" + "gettor@torproject.org\n linux es") + self.assertEqual(request["language"], "es-AR") def test_sent_links_message(self): ep = self.sm_client