Reputation: 417
I am using the nh3 library with my Django project to sanitize my HTML of my rendered markdown. I also have fenced code blocks and code block highlighting implemented. If I do not use mark_safe on my nh3 cleaned markdown, all my rich text markdown becomes html code. If I use mark_safe in my Post model after cleaning the markdown, it no longer appears as html code. This is what I have in my Post model's get_message_as_markdown function responsible for generating markdown:
from markdown import markdown
import nh3
def get_message_as_markdown(self):
clean_content = nh3.clean(self.message)
rendered_content = markdown(clean_content, extensions=['fenced_code', 'codehilite'])
return mark_safe(rendered_content)
Is this "safe" to do? Thanks in advance!
Edit: I ended up implementing nh3 in my Django project. it works nicely. It removes “offending” tags from the live html, which is great. But the offending tags are still present in my edit_post.html textarea. Is that what is supposed to happen?
This is how I use it in my project:
# boards/forms.py
import nh3
class SanitizedTextareaField(forms.CharField):
def clean(self, value):
value = super().clean(value)
return nh3.clean(value, tags={
"a",
"abbr",
"acronym",
"b",
"blockquote",
"code",
"em",
"i",
"li",
"ol",
"strong",
"ul",
},
attributes={
"a": {"href", "title"},
"abbr": {"title"},
"acronym": {"title"},
},
url_schemes={"https"},
link_rel=None,)
class PostForm(forms.ModelForm):
message = SanitizedTextareaField(widget=forms.Textarea)
class Meta:
model = Post
fields = ['message', ]
# boards/models.py
import nh3
class Post(models.Model):
message = models.TextField()
topic = models.ForeignKey(Topic, on_delete=models.CASCADE, related_name="posts")
created_at = models.DateTimeField(auto_now_add=True)
updated_at = models.DateTimeField(null=True)
created_by = models.ForeignKey(User, on_delete=models.CASCADE, related_name="posts")
updated_by = models.ForeignKey(
User, on_delete=models.CASCADE, null=True, related_name="+"
)
likes = models.ManyToManyField(User, blank=True, related_name="post_likes")
def total_likes(self):
return self.likes.count()
def __str__(self):
# truncated_message = Truncator(self.message)
# return truncated_message.chars(30)
return self.message
def get_absolute_url(self):
return reverse("post_detail", kwargs={"pk": self.pk})
def get_message_as_markdown(self):
clean_content = nh3.clean(self.message, tags={
"a",
"abbr",
"acronym",
"b",
"blockquote",
"code",
"em",
"i",
"li",
"ol",
"strong",
"ul",
},
attributes={
"a": {"href", "title"},
"abbr": {"title"},
"acronym": {"title"},
},
url_schemes={"https"},
link_rel=None,)
rendered_content = markdown(clean_content, extensions=['fenced_code', 'codehilite'])
return mark_safe(rendered_content)
nh3.clean() removes any html element which is not listed in tags, but the offending tags still appear in edit_post.html. Like the script tag, for example:
But in the post-detail.html view, it does not appear:
And same with the live html:
my implementation of nh3.clean() might be a bit of overkill. The behavior is the same whether I implement it both in forms.py and models.py, or just one of them I have never done this before in Python/Django, and I don’t want to be relaying information or code that does not safeguard my users and site from bad actors (i.e., css attacks, for example).
And btw, as indicated in the code above, I also have implemented fenced code and code highlighting in my markdown.
Lastly, if I do not include anchor elements in my nh3 allowed tags, I still am able to create them successfully in the markdown. Why is that? So I am wondering if there are other tags which are overlooked by nh3 even though they are not included in tags. Thanks in advance for any feedback!
I am not finding my answers in any nh3 documentation or anywhere else. And bleach is deprecated and has vulnerability issues. No longer maintained.
Final Edit:
I succeeded in removing offending tags from the edit_post view. This is the code I have now and that works as it should:
# boards/forms.py
import nh3
class HtmlSanitizedCharField(forms.CharField):
def to_python(self, value):
value = super().to_python(value)
if value not in self.empty_values:
value = nh3.clean(
value,
# Allow only tags and attributes from our rich text editor
tags={
"a",
"abbr",
"acronym",
"b",
"blockquote",
"code",
"em",
"I",
"li",
"ol",
"strong",
"ul",
"s",
"sup",
"sub",
},
attributes={
"a": {"href"},
"abbr": {"title"},
"acronym": {"title"},
},
url_schemes={"https"},
link_rel=None,)
return value
class PostForm(forms.ModelForm):
message = HtmlSanitizedCharField(widget=forms.Textarea)
class Meta:
model = Post
fields = ['message', ]
Then:
# boards/models.py
class Post(models.Model):
message = models.TextField()
def get_message_as_markdown(self):
clean_content = nh3.clean(self.message, tags={
"a",
"abbr",
"acronym",
"b",
"blockquote",
"code",
"em",
"I",
"li",
"ol",
"strong",
"ul",
"s",
"sup",
"sub",
},
attributes={
"a": {"href"},
"abbr": {"title"},
"acronym": {"title"},
},
url_schemes={"http", "https", "mailto"},
link_rel=None,)
rendered_content = markdown(clean_content, extensions=['fenced_code', 'codehilite'])
return mark_safe(rendered_content)
Then:
# boards/views.py
@method_decorator(login_required, name='dispatch')
class PostUpdateView(UpdateView):
model = Post
fields = ('message', )
template_name = 'edit_post.html'
pk_url_kwarg = 'post_pk'
context_object_name = 'post'
success_url = "/"
def get_queryset(self):
queryset = super().get_queryset()
return queryset.filter(created_by=self.request.user)
def form_valid(self, form):
if form:
form.instance.message = nh3.clean(form.instance.message, # Allow only tags and attributes from our rich text editor
tags={
"a",
"abbr",
"acronym",
"b",
"blockquote",
"code",
"em",
"I",
"li",
"ol",
"strong",
"ul",
"s",
"sup",
"sub",
},
attributes={
"a": {"href"},
"abbr": {"title"},
"acronym": {"title"},
},
url_schemes={"https"},
link_rel=None,)
super().form_valid(form)
post = form.save(commit=False)
post.updated_by = self.request.user
post.updated_at = timezone.now()
post.save()
print(post.save, 'save the updated data')
return redirect('topic_posts', pk=post.topic.board.pk, topic_pk=post.topic.pk)
Upvotes: 0
Views: 78