Add permitted display characters check for post titles (#5692)

* Add permitted display characters check for post titles

* Remove accidental formatting change

* Use invisible-chars crate for validation

* Adding invisible chars library (#5759)

---------

Co-authored-by: Dessalines <dessalines@users.noreply.github.com>
Co-authored-by: Dessalines <tyhou13@gmx.com>
This commit is contained in:
SleeplessOne1917 2025-06-10 13:42:37 +00:00 committed by GitHub
parent 30cbd713ef
commit b77ab870d4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 21 additions and 60 deletions

2
.gitignore vendored
View File

@ -5,7 +5,7 @@ ansible/passwords/
# docker build files
docker/lemmy_mine.hjson
docker/dev/env_deploy.sh
volumes
docker/volumes
# ide config
.idea

12
Cargo.lock generated
View File

@ -2937,6 +2937,17 @@ dependencies = [
"generic-array",
]
[[package]]
name = "invisible-characters"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8c68bbf95a074c9961fa4f8c43d172557101239c508d18f74e87e7a41d6ab4ac"
dependencies = [
"anyhow",
"serde",
"serde_json",
]
[[package]]
name = "io-extras"
version = "0.18.4"
@ -4004,6 +4015,7 @@ dependencies = [
"futures",
"git-version",
"http 1.3.1",
"invisible-characters",
"itertools 0.14.0",
"markdown-it",
"markdown-it-block-spoiler",

View File

@ -82,6 +82,7 @@ markdown-it-footnote = "0.2.0"
moka = { workspace = true, optional = true }
git-version = "0.3.9"
unicode-segmentation = "1.12.0"
invisible-characters = "0.1.3"
[dev-dependencies]
pretty_assertions = { workspace = true }

View File

@ -1,5 +1,6 @@
use crate::error::{LemmyErrorExt, LemmyErrorType, LemmyResult, MAX_API_PARAM_ELEMENTS};
use clearurls::UrlCleaner;
use invisible_characters::INVISIBLE_CHARS;
use itertools::Itertools;
use regex::{Regex, RegexBuilder, RegexSet};
use std::sync::LazyLock;
@ -30,62 +31,6 @@ const MIN_LENGTH_BLOCKING_KEYWORD: usize = 3;
const MAX_LENGTH_BLOCKING_KEYWORD: usize = 50;
const TAG_NAME_MIN_LENGTH: usize = 3;
const TAG_NAME_MAX_LENGTH: usize = 100;
//Invisible unicode characters, taken from https://invisible-characters.com/
const FORBIDDEN_DISPLAY_CHARS: [char; 53] = [
'\u{0009}',
'\u{00a0}',
'\u{00ad}',
'\u{034f}',
'\u{061c}',
'\u{115f}',
'\u{1160}',
'\u{17b4}',
'\u{17b5}',
'\u{180e}',
'\u{2000}',
'\u{2001}',
'\u{2002}',
'\u{2003}',
'\u{2004}',
'\u{2005}',
'\u{2006}',
'\u{2007}',
'\u{2008}',
'\u{2009}',
'\u{200a}',
'\u{200b}',
'\u{200c}',
'\u{200d}',
'\u{200e}',
'\u{200f}',
'\u{202f}',
'\u{205f}',
'\u{2060}',
'\u{2061}',
'\u{2062}',
'\u{2063}',
'\u{2064}',
'\u{206a}',
'\u{206b}',
'\u{206c}',
'\u{206d}',
'\u{206e}',
'\u{206f}',
'\u{3000}',
'\u{2800}',
'\u{3164}',
'\u{feff}',
'\u{ffa0}',
'\u{1d159}',
'\u{1d173}',
'\u{1d174}',
'\u{1d175}',
'\u{1d176}',
'\u{1d177}',
'\u{1d178}',
'\u{1d179}',
'\u{1d17a}',
];
fn has_newline(name: &str) -> bool {
name.contains('\n')
@ -113,7 +58,7 @@ pub fn is_valid_actor_name(name: &str, actor_name_max_length: i32) -> LemmyResul
fn has_3_permitted_display_chars(name: &str) -> bool {
let mut num_non_fdc: i8 = 0;
for c in name.chars() {
if !FORBIDDEN_DISPLAY_CHARS.contains(&c) {
if !INVISIBLE_CHARS.contains(&c) {
num_non_fdc += 1;
if num_non_fdc >= 3 {
break;
@ -130,7 +75,7 @@ fn has_3_permitted_display_chars(name: &str) -> bool {
pub fn is_valid_display_name(name: &str, actor_name_max_length: i32) -> LemmyResult<()> {
let actor_name_max_length: usize = actor_name_max_length.try_into()?;
let check = !name.starts_with('@')
&& !name.starts_with(FORBIDDEN_DISPLAY_CHARS)
&& !name.starts_with(INVISIBLE_CHARS)
&& name.chars().count() <= actor_name_max_length
&& !has_newline(name)
&& has_3_permitted_display_chars(name);
@ -152,7 +97,8 @@ pub fn is_valid_matrix_id(matrix_id: &str) -> LemmyResult<()> {
pub fn is_valid_post_title(title: &str) -> LemmyResult<()> {
let length = title.trim().chars().count();
let check = (3..=200).contains(&length) && !has_newline(title);
let check =
(3..=200).contains(&length) && !has_newline(title) && has_3_permitted_display_chars(title);
if !check {
Err(LemmyErrorType::InvalidPostTitle.into())
} else {
@ -540,6 +486,8 @@ Line3",
.is_ok());
assert!(is_valid_post_title(" POST TITLE 😃😃😃😃😃").is_ok());
assert!(is_valid_post_title("\n \n \n \n ").is_err()); // tabs/spaces/newlines
assert!(is_valid_post_title("\u{206a}").is_err()); // invisible chars
assert!(is_valid_post_title("\u{1f3f3}\u{fe0f}\u{200d}\u{26a7}\u{fe0f}").is_ok());
}
#[test]