src/url.rs - third_party/github.com/alacritty/alacritty - Git at Google

 // Copyright 2016 Joe Wilm, The Alacritty Project Contributors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 use url::Url;

 // See https://tools.ietf.org/html/rfc3987#page-13
 const URL_SEPARATOR_CHARS: [char; 10] = ['<', '>', '"', ' ', '{', '}', '|', '\\', '^', '`'];
 const URL_DENY_END_CHARS: [char; 8] = ['.', ',', ';', ':', '?', '!', '/', '('];
 const URL_SCHEMES: [&str; 8] = [
     "http", "https", "mailto", "news", "file", "git", "ssh", "ftp",
 ];

 // Parser for streaming inside-out detection of URLs.
 pub struct UrlParser {
     state: String,
 }

 impl UrlParser {
     pub fn new() -> Self {
         UrlParser {
             state: String::new(),
         }
     }

     /// Advance the parser one character to the left.
     pub fn advance_left(&mut self, c: char) -> bool {
         self.advance(c, 0)
     }

     /// Advance the parser one character to the right.
     pub fn advance_right(&mut self, c: char) -> bool {
         self.advance(c, self.state.len())
     }

     /// Returns the URL if the parser has found any.
     pub fn url(mut self) -> Option<String> {
         // Remove non-alphabetical characters before scheme
         if let Some(index) = self.state.find("://") {
             for i in (0..index - 1).rev() {
                 match self.state.chars().nth(i).unwrap() {
                     'a'...'z' | 'A'...'Z' => (),
                     _ => {
                         self.state = self.state.split_off(i + 1);
                         break;
                     }
                 }
             }
         }

         // Remove non-matching parenthesis and brackets
         let mut open_parens_count: isize = 0;
         let mut open_bracks_count: isize = 0;
         for (i, c) in self.state.chars().enumerate() {
             match c {
                 '(' => open_parens_count += 1,
                 ')' if open_parens_count > 0 => open_parens_count -= 1,
                 '[' => open_bracks_count += 1,
                 ']' if open_bracks_count > 0 => open_bracks_count -= 1,
                 ')' | ']' => {
                     self.state.truncate(i);
                     break;
                 }
                 _ => (),
             }
         }

         // Track number of quotes
         let mut num_quotes = self.state.chars().filter(|&c| c == '\'').count();

         // Remove all characters which aren't allowed at the end of a URL
         while !self.state.is_empty()
             && (URL_DENY_END_CHARS.contains(&self.state.chars().last().unwrap())
                 || (num_quotes % 2 != 0 && self.state.ends_with('\''))
                 || self.state.ends_with("''")
                 || self.state.ends_with("()"))
         {
             if self.state.pop().unwrap() == '\'' {
                 num_quotes -= 1;
             }
         }

         // Check if string is valid url
         match Url::parse(&self.state) {
             Ok(url) => {
                 if URL_SCHEMES.contains(&url.scheme()) {
                     Some(self.state)
                 } else {
                     None
                 }
             }
             Err(_) => None,
         }
     }

     fn advance(&mut self, c: char, pos: usize) -> bool {
         if URL_SEPARATOR_CHARS.contains(&c)
             || (c >= '\u{00}' && c <= '\u{1F}')
             || (c >= '\u{7F}' && c <= '\u{9F}')
         {
             true
         } else {
             self.state.insert(pos, c);
             false
         }
     }
 }

 #[cfg(test)]
 mod tests {
     use std::mem;

     use crate::grid::Grid;
     use crate::index::{Column, Line, Point};
     use crate::term::{Cell, Search, SizeInfo, Term};

     fn url_create_term(input: &str) -> Term {
         let size = SizeInfo {
             width: 21.0,
             height: 51.0,
             cell_width: 3.0,
             cell_height: 3.0,
             padding_x: 0.0,
             padding_y: 0.0,
             dpr: 1.0,
         };

         let mut term = Term::new(&Default::default(), size);
         let mut grid: Grid<Cell> = Grid::new(Line(1), Column(input.len()), 0, Cell::default());

         for (i, c) in input.chars().enumerate() {
             grid[Line(0)][Column(i)].c = c;
         }

         mem::swap(term.grid_mut(), &mut grid);

         term
     }

     fn url_test(input: &str, expected: &str, click_index: usize) {
         let term = url_create_term(input);

         let url = term.url_search(Point::new(0, Column(click_index)));

         assert_eq!(url, Some(expected.into()));
     }

     #[test]
     fn url_skip_invalid() {
         let term = url_create_term("no url here");
         let url = term.url_search(Point::new(0, Column(4)));
         assert_eq!(url, None);
     }

     #[test]
     fn url_matching_chars() {
         url_test("(https://example.org/test(ing))", "https://example.org/test(ing)", 5);
         url_test("https://example.org/test(ing)", "https://example.org/test(ing)", 5);
         url_test("((https://example.org))", "https://example.org", 5);
         url_test(")https://example.org(", "https://example.org", 5);
         url_test("https://example.org)", "https://example.org", 5);
         url_test("https://example.org(", "https://example.org", 5);
         url_test("(https://one.org/)(https://two.org/)", "https://one.org", 5);

         url_test("https://[2001:db8:a0b:12f0::1]:80", "https://[2001:db8:a0b:12f0::1]:80", 5);
         url_test("([(https://example.org/test(ing))])", "https://example.org/test(ing)", 5);
         url_test("https://example.org/]()", "https://example.org", 5);
         url_test("[https://example.org]", "https://example.org", 5);

         url_test("'https://example.org/test'ing'''", "https://example.org/test'ing'", 5);
         url_test("https://example.org/test'ing'", "https://example.org/test'ing'", 5);
         url_test("'https://example.org'", "https://example.org", 5);
         url_test("'https://example.org", "https://example.org", 5);
         url_test("https://example.org'", "https://example.org", 5);
     }

     #[test]
     fn url_detect_end() {
         url_test("https://example.org/test\u{00}ing", "https://example.org/test", 5);
         url_test("https://example.org/test\u{1F}ing", "https://example.org/test", 5);
         url_test("https://example.org/test\u{7F}ing", "https://example.org/test", 5);
         url_test("https://example.org/test\u{9F}ing", "https://example.org/test", 5);
         url_test("https://example.org/test\ting", "https://example.org/test", 5);
         url_test("https://example.org/test ing", "https://example.org/test", 5);
     }

     #[test]
     fn url_remove_end_chars() {
         url_test("https://example.org/test?ing", "https://example.org/test?ing", 5);
         url_test("https://example.org.,;:)'!/?", "https://example.org", 5);
         url_test("https://example.org'.", "https://example.org", 5);
     }

     #[test]
     fn url_remove_start_chars() {
         url_test("complicated:https://example.org", "https://example.org", 15);
         url_test("test.https://example.org", "https://example.org", 10);
         url_test(",https://example.org", "https://example.org", 5);
     }

     #[test]
     fn url_unicode() {
         url_test("https://xn--example-2b07f.org", "https://xn--example-2b07f.org", 5);
         url_test("https://example.org/\u{2008A}", "https://example.org/\u{2008A}", 5);
         url_test("https://example.org/\u{f17c}", "https://example.org/\u{f17c}", 5);
         url_test("https://üñîçøðé.com/ä", "https://üñîçøðé.com/ä", 5);
     }

     #[test]
     fn url_schemes() {
         url_test("mailto://example.org", "mailto://example.org", 5);
         url_test("https://example.org", "https://example.org", 5);
         url_test("http://example.org", "http://example.org", 5);
         url_test("news://example.org", "news://example.org", 5);
         url_test("file://example.org", "file://example.org", 5);
         url_test("git://example.org", "git://example.org", 5);
         url_test("ssh://example.org", "ssh://example.org", 5);
         url_test("ftp://example.org", "ftp://example.org", 5);
     }
 }
	// Copyright 2016 Joe Wilm, The Alacritty Project Contributors
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	use url::Url;

	// See https://tools.ietf.org/html/rfc3987#page-13
	const URL_SEPARATOR_CHARS: [char; 10] = ['<', '>', '"', ' ', '{', '}', '\|', '\\', '^', '`'];
	const URL_DENY_END_CHARS: [char; 8] = ['.', ',', ';', ':', '?', '!', '/', '('];
	const URL_SCHEMES: [&str; 8] = [
	"http", "https", "mailto", "news", "file", "git", "ssh", "ftp",
	];

	// Parser for streaming inside-out detection of URLs.
	pub struct UrlParser {
	state: String,
	}

	impl UrlParser {
	pub fn new() -> Self {
	UrlParser {
	state: String::new(),
	}
	}

	/// Advance the parser one character to the left.
	pub fn advance_left(&mut self, c: char) -> bool {
	self.advance(c, 0)
	}

	/// Advance the parser one character to the right.
	pub fn advance_right(&mut self, c: char) -> bool {
	self.advance(c, self.state.len())
	}

	/// Returns the URL if the parser has found any.
	pub fn url(mut self) -> Option<String> {
	// Remove non-alphabetical characters before scheme
	if let Some(index) = self.state.find("://") {
	for i in (0..index - 1).rev() {
	match self.state.chars().nth(i).unwrap() {
	'a'...'z' \| 'A'...'Z' => (),
	_ => {
	self.state = self.state.split_off(i + 1);
	break;
	}
	}
	}
	}

	// Remove non-matching parenthesis and brackets
	let mut open_parens_count: isize = 0;
	let mut open_bracks_count: isize = 0;
	for (i, c) in self.state.chars().enumerate() {
	match c {
	'(' => open_parens_count += 1,
	')' if open_parens_count > 0 => open_parens_count -= 1,
	'[' => open_bracks_count += 1,
	']' if open_bracks_count > 0 => open_bracks_count -= 1,
	')' \| ']' => {
	self.state.truncate(i);
	break;
	}
	_ => (),
	}
	}

	// Track number of quotes
	let mut num_quotes = self.state.chars().filter(\|&c\| c == '\'').count();

	// Remove all characters which aren't allowed at the end of a URL
	while !self.state.is_empty()
	&& (URL_DENY_END_CHARS.contains(&self.state.chars().last().unwrap())
	\|\| (num_quotes % 2 != 0 && self.state.ends_with('\''))
	\|\| self.state.ends_with("''")
	\|\| self.state.ends_with("()"))
	{
	if self.state.pop().unwrap() == '\'' {
	num_quotes -= 1;
	}
	}

	// Check if string is valid url
	match Url::parse(&self.state) {
	Ok(url) => {
	if URL_SCHEMES.contains(&url.scheme()) {
	Some(self.state)
	} else {
	None
	}
	}
	Err(_) => None,
	}
	}

	fn advance(&mut self, c: char, pos: usize) -> bool {
	if URL_SEPARATOR_CHARS.contains(&c)
	\|\| (c >= '\u{00}' && c <= '\u{1F}')
	\|\| (c >= '\u{7F}' && c <= '\u{9F}')
	{
	true
	} else {
	self.state.insert(pos, c);
	false
	}
	}
	}

	#[cfg(test)]
	mod tests {
	use std::mem;

	use crate::grid::Grid;
	use crate::index::{Column, Line, Point};
	use crate::term::{Cell, Search, SizeInfo, Term};

	fn url_create_term(input: &str) -> Term {
	let size = SizeInfo {
	width: 21.0,
	height: 51.0,
	cell_width: 3.0,
	cell_height: 3.0,
	padding_x: 0.0,
	padding_y: 0.0,
	dpr: 1.0,
	};

	let mut term = Term::new(&Default::default(), size);
	let mut grid: Grid<Cell> = Grid::new(Line(1), Column(input.len()), 0, Cell::default());

	for (i, c) in input.chars().enumerate() {
	grid[Line(0)][Column(i)].c = c;
	}

	mem::swap(term.grid_mut(), &mut grid);

	term
	}

	fn url_test(input: &str, expected: &str, click_index: usize) {
	let term = url_create_term(input);

	let url = term.url_search(Point::new(0, Column(click_index)));

	assert_eq!(url, Some(expected.into()));
	}

	#[test]
	fn url_skip_invalid() {
	let term = url_create_term("no url here");
	let url = term.url_search(Point::new(0, Column(4)));
	assert_eq!(url, None);
	}

	#[test]
	fn url_matching_chars() {
	url_test("(https://example.org/test(ing))", "https://example.org/test(ing)", 5);
	url_test("https://example.org/test(ing)", "https://example.org/test(ing)", 5);
	url_test("((https://example.org))", "https://example.org", 5);
	url_test(")https://example.org(", "https://example.org", 5);
	url_test("https://example.org)", "https://example.org", 5);
	url_test("https://example.org(", "https://example.org", 5);
	url_test("(https://one.org/)(https://two.org/)", "https://one.org", 5);

	url_test("https://[2001:db8:a0b:12f0::1]:80", "https://[2001:db8:a0b:12f0::1]:80", 5);
	url_test("([(https://example.org/test(ing))])", "https://example.org/test(ing)", 5);
	url_test("https://example.org/]()", "https://example.org", 5);
	url_test("[https://example.org]", "https://example.org", 5);

	url_test("'https://example.org/test'ing'''", "https://example.org/test'ing'", 5);
	url_test("https://example.org/test'ing'", "https://example.org/test'ing'", 5);
	url_test("'https://example.org'", "https://example.org", 5);
	url_test("'https://example.org", "https://example.org", 5);
	url_test("https://example.org'", "https://example.org", 5);
	}

	#[test]
	fn url_detect_end() {
	url_test("https://example.org/test\u{00}ing", "https://example.org/test", 5);
	url_test("https://example.org/test\u{1F}ing", "https://example.org/test", 5);
	url_test("https://example.org/test\u{7F}ing", "https://example.org/test", 5);
	url_test("https://example.org/test\u{9F}ing", "https://example.org/test", 5);
	url_test("https://example.org/test\ting", "https://example.org/test", 5);
	url_test("https://example.org/test ing", "https://example.org/test", 5);
	}

	#[test]
	fn url_remove_end_chars() {
	url_test("https://example.org/test?ing", "https://example.org/test?ing", 5);
	url_test("https://example.org.,;:)'!/?", "https://example.org", 5);
	url_test("https://example.org'.", "https://example.org", 5);
	}

	#[test]
	fn url_remove_start_chars() {
	url_test("complicated:https://example.org", "https://example.org", 15);
	url_test("test.https://example.org", "https://example.org", 10);
	url_test(",https://example.org", "https://example.org", 5);
	}

	#[test]
	fn url_unicode() {
	url_test("https://xn--example-2b07f.org", "https://xn--example-2b07f.org", 5);
	url_test("https://example.org/\u{2008A}", "https://example.org/\u{2008A}", 5);
	url_test("https://example.org/\u{f17c}", "https://example.org/\u{f17c}", 5);
	url_test("https://üñîçøðé.com/ä", "https://üñîçøðé.com/ä", 5);
	}

	#[test]
	fn url_schemes() {
	url_test("mailto://example.org", "mailto://example.org", 5);
	url_test("https://example.org", "https://example.org", 5);
	url_test("http://example.org", "http://example.org", 5);
	url_test("news://example.org", "news://example.org", 5);
	url_test("file://example.org", "file://example.org", 5);
	url_test("git://example.org", "git://example.org", 5);
	url_test("ssh://example.org", "ssh://example.org", 5);
	url_test("ftp://example.org", "ftp://example.org", 5);
	}
	}