电速宝
Ви не можете вибрати більше 25 тем Теми мають розпочинатися з літери або цифри, можуть містити дефіси (-) і не повинні перевищувати 35 символів.

parseFullAddress.js 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317
  1. // utils/parseFullAddress.js
  2. // 完整的独立地址解析函数(支持返回完整格式:含代码、经纬度、POI等)
  3. // ===================== 基础配置 - 可根据实际需求扩展 =====================
  4. // 1. 省市区名称+代码映射表(简化版,如需完整数据可从areaData导入)
  5. const AREA_CODE_MAP = {
  6. // 省份映射(名称 -> { code, 下属城市 })
  7. province: {
  8. "北京市": { code: "110000", cities: ["北京市"] },
  9. "天津市": { code: "120000", cities: ["天津市"] },
  10. "河北省": { code: "130000", cities: ["石家庄市", "唐山市", "秦皇岛市"] },
  11. "山西省": { code: "140000", cities: ["太原市", "大同市"] },
  12. "内蒙古自治区": { code: "150000", cities: ["呼和浩特市", "包头市"] },
  13. "辽宁省": { code: "210000", cities: ["沈阳市", "大连市"] },
  14. "吉林省": { code: "220000", cities: ["长春市", "吉林市"] },
  15. "黑龙江省": { code: "230000", cities: ["哈尔滨市", "齐齐哈尔市"] },
  16. "上海市": { code: "310000", cities: ["上海市"] },
  17. "江苏省": { code: "320000", cities: ["南京市", "无锡市", "苏州市"] },
  18. "浙江省": { code: "330000", cities: ["杭州市", "宁波市", "温州市"] },
  19. "安徽省": { code: "340000", cities: ["合肥市", "芜湖市"] },
  20. "福建省": { code: "350000", cities: ["福州市", "厦门市"] },
  21. "江西省": { code: "360000", cities: ["南昌市", "九江市"] },
  22. "山东省": { code: "370000", cities: ["济南市", "青岛市"] },
  23. "河南省": { code: "410000", cities: ["郑州市", "洛阳市"] },
  24. "湖北省": { code: "420000", cities: ["武汉市", "宜昌市"] },
  25. "湖南省": { code: "430000", cities: ["长沙市", "株洲市"] },
  26. "广东省": { code: "440000", cities: ["广州市", "深圳市", "佛山市"] },
  27. "广西壮族自治区": { code: "450000", cities: ["南宁市", "柳州市"] },
  28. "海南省": { code: "460000", cities: ["海口市", "三亚市"] },
  29. "重庆市": { code: "500000", cities: ["重庆市"] },
  30. "四川省": { code: "510000", cities: ["成都市", "绵阳市"] },
  31. "贵州省": { code: "520000", cities: ["贵阳市", "遵义市"] },
  32. "云南省": { code: "530000", cities: ["昆明市", "大理市"] },
  33. "西藏自治区": { code: "540000", cities: ["拉萨市"] },
  34. "陕西省": { code: "610000", cities: ["西安市", "宝鸡市"] },
  35. "甘肃省": { code: "620000", cities: ["兰州市", "天水市"] },
  36. "青海省": { code: "630000", cities: ["西宁市"] },
  37. "宁夏回族自治区": { code: "640000", cities: ["银川市"] },
  38. "新疆维吾尔自治区": { code: "650000", cities: ["乌鲁木齐市"] },
  39. "台湾省": { code: "710000", cities: ["台北市", "高雄市"] },
  40. "香港特别行政区": { code: "810000", cities: ["香港特别行政区"] },
  41. "澳门特别行政区": { code: "820000", cities: ["澳门特别行政区"] }
  42. },
  43. // 城市映射(名称 -> { code, 下属区县 })
  44. city: {
  45. "北京市": { code: "110100", districts: ["东城区", "西城区", "朝阳区", "海淀区", "顺义区"] },
  46. "天津市": { code: "120100", districts: ["和平区", "河东区", "西青区", "北辰区"] },
  47. "深圳市": { code: "440300", districts: ["罗湖区", "福田区", "南山区", "宝安区", "龙岗区"] },
  48. "广州市": { code: "440100", districts: ["越秀区", "海珠区", "天河区"] },
  49. "上海市": { code: "310100", districts: ["黄浦区", "徐汇区", "长宁区"] },
  50. "杭州市": { code: "330100", districts: ["上城区", "拱墅区", "西湖区"] }
  51. },
  52. // 区县映射(名称 -> { code, 经纬度 })
  53. district: {
  54. "东城区": { code: "110101", lat: 39.938874, lng: 116.407413 },
  55. "西城区": { code: "110102", lat: 39.914745, lng: 116.366761 },
  56. "朝阳区": { code: "110105", lat: 39.948859, lng: 116.487501 },
  57. "海淀区": { code: "110108", lat: 39.992947, lng: 116.316486 },
  58. "顺义区": { code: "110113", lat: 40.111562, lng: 116.658249 }, // 修正你示例中的错误代码(120101是天津和平区)
  59. "南山区": { code: "440305", lat: 22.542895, lng: 113.941649 },
  60. "福田区": { code: "440304", lat: 22.543096, lng: 114.057865 },
  61. "罗湖区": { code: "440303", lat: 22.552726, lng: 114.104663 }
  62. }
  63. };
  64. // 2. 标准省市区名称集合(从映射表中提取,用于匹配)
  65. const PROVINCES = Object.keys(AREA_CODE_MAP.province);
  66. const CITIES = Object.keys(AREA_CODE_MAP.city);
  67. const DISTRICTS = Object.keys(AREA_CODE_MAP.district);
  68. // 3. POI关键词(用于提取建筑物名称)
  69. const POI_KEYWORDS = ["大厦", "小区", "花园", "公寓", "写字楼", "酒店", "广场", "中心", "别墅", "商铺"];
  70. // ===================== 工具函数 =====================
  71. /**
  72. * 提取手机号(纯函数)
  73. * @param {string} text - 输入文本
  74. * @returns {string} 提取的手机号
  75. */
  76. function extractPhone(text) {
  77. if (!text) return '';
  78. const phoneReg = /1[3-9]\d{9}|1[3-9]\d{2}[- ]?\d{4}[- ]?\d{4}/g;
  79. const match = text.match(phoneReg);
  80. return match ? match[0].replace(/\D/g, '') : '';
  81. }
  82. /**
  83. * 提取姓名(纯函数)
  84. * @param {string} text - 输入文本
  85. * @returns {string} 提取的姓名
  86. */
  87. function extractName(text) {
  88. if (!text) return '';
  89. const nameReg = /[\u4e00-\u9fa5]{2,4}/;
  90. const match = text.match(nameReg);
  91. return match ? match[0] : '';
  92. }
  93. /**
  94. * 提取POI名称(从详细地址中提取建筑物名称)
  95. * @param {string} detail - 详细地址
  96. * @returns {string} POI名称
  97. */
  98. function extractPOIName(detail) {
  99. if (!detail) return '';
  100. // 匹配包含POI关键词的连续字符(2-10字)
  101. const poiReg = new RegExp(`[\\u4e00-\\u9fa5a-zA-Z0-9]{2,10}(${POI_KEYWORDS.join('|')})`, 'g');
  102. const matches = detail.match(poiReg);
  103. return matches && matches.length > 0 ? matches[0] : '';
  104. }
  105. /**
  106. * 获取行政区划代码
  107. * @param {string} province - 省份名称
  108. * @param {string} city - 城市名称
  109. * @param {string} district - 区县名称
  110. * @returns {object} { provinceCode, cityCode, districtCode }
  111. */
  112. function getAreaCodes(province, city, district) {
  113. return {
  114. provinceCode: AREA_CODE_MAP.province[province]?.code || '',
  115. cityCode: AREA_CODE_MAP.city[city]?.code || '',
  116. districtCode: AREA_CODE_MAP.district[district]?.code || ''
  117. };
  118. }
  119. /**
  120. * 获取经纬度(优先区县,无则城市,无则省份)
  121. * @param {string} province - 省份名称
  122. * @param {string} city - 城市名称
  123. * @param {string} district - 区县名称
  124. * @returns {object} { latitude, longitude }
  125. */
  126. function getLatLng(province, city, district) {
  127. // 区县经纬度
  128. if (district && AREA_CODE_MAP.district[district]) {
  129. return {
  130. latitude: AREA_CODE_MAP.district[district].lat,
  131. longitude: AREA_CODE_MAP.district[district].lng
  132. };
  133. }
  134. // 城市默认经纬度(可扩展城市经纬度映射)
  135. const cityLatLngMap = {
  136. "北京市": { lat: 39.9042, lng: 116.4074 },
  137. "上海市": { lat: 31.2304, lng: 121.4737 },
  138. "深圳市": { lat: 22.5431, lng: 114.0579 },
  139. "广州市": { lat: 23.1289, lng: 113.2655 }
  140. };
  141. if (city && cityLatLngMap[city]) {
  142. return {
  143. latitude: cityLatLngMap[city].lat,
  144. longitude: cityLatLngMap[city].lng
  145. };
  146. }
  147. // 省份默认经纬度
  148. const provinceLatLngMap = {
  149. "广东省": { lat: 23.1289, lng: 113.2655 },
  150. "江苏省": { lat: 32.0473, lng: 118.7624 },
  151. "浙江省": { lat: 30.2795, lng: 120.1576 }
  152. };
  153. if (province && provinceLatLngMap[province]) {
  154. return {
  155. latitude: provinceLatLngMap[province].lat,
  156. longitude: provinceLatLngMap[province].lng
  157. };
  158. }
  159. // 默认经纬度(北京)
  160. return { latitude: 39.9042, longitude: 116.4074 };
  161. }
  162. /**
  163. * 识别省市区(纯函数,基于映射表优化匹配准确性)
  164. * @param {string} text - 输入文本
  165. * @returns {object} { province, city, district, remainingText }
  166. */
  167. function recognizeArea(text) {
  168. if (!text) return { province: '', city: '', district: '', remainingText: '' };
  169. let province = '';
  170. let city = '';
  171. let district = '';
  172. let remainingText = text.trim();
  173. // 1. 匹配省份(优先长名称,避免短名称误匹配)
  174. const sortedProvinces = [...PROVINCES].sort((a, b) => b.length - a.length);
  175. for (const p of sortedProvinces) {
  176. if (remainingText.includes(p)) {
  177. province = p;
  178. remainingText = remainingText.replace(p, '').trim();
  179. break;
  180. }
  181. }
  182. // 2. 匹配城市(基于已选省份过滤,提升准确性)
  183. let candidateCities = [...CITIES].sort((a, b) => b.length - a.length);
  184. if (province && AREA_CODE_MAP.province[province].cities) {
  185. candidateCities = candidateCities.filter(c => AREA_CODE_MAP.province[province].cities.includes(c));
  186. }
  187. for (const c of candidateCities) {
  188. if (remainingText.includes(c)) {
  189. city = c;
  190. remainingText = remainingText.replace(c, '').trim();
  191. break;
  192. }
  193. }
  194. // 3. 匹配区县(基于已选城市过滤)
  195. let candidateDistricts = [...DISTRICTS].sort((a, b) => b.length - a.length);
  196. if (city && AREA_CODE_MAP.city[city].districts) {
  197. candidateDistricts = candidateDistricts.filter(d => AREA_CODE_MAP.city[city].districts.includes(d));
  198. }
  199. for (const d of candidateDistricts) {
  200. if (remainingText.includes(d)) {
  201. district = d;
  202. remainingText = remainingText.replace(d, '').trim();
  203. break;
  204. }
  205. }
  206. return { province, city, district, remainingText };
  207. }
  208. /**
  209. * 完整地址解析(返回完整格式:含代码、经纬度、POI等)
  210. * @param {string} text - 输入文本
  211. * @param {object} [options] - 可选配置
  212. * @param {boolean} [options.isDefault=false] - 是否默认地址
  213. * @returns {object} 完整解析结果
  214. */
  215. const parseFullAddress = (text, options = {}) => {
  216. const { isDefault = false } = options;
  217. if (!text) return {
  218. receiver: '',
  219. phone: '',
  220. province: '',
  221. provinceCode: '',
  222. city: '',
  223. cityCode: '',
  224. district: '',
  225. districtCode: '',
  226. detail: '',
  227. poiName: '',
  228. latitude: 0,
  229. longitude: 0,
  230. isDefault
  231. };
  232. let remainingText = text.trim();
  233. // 1. 提取手机号
  234. const phone = extractPhone(remainingText);
  235. if (phone) {
  236. const phoneReg = new RegExp(
  237. phone.replace(/(\d)/g, '\\d') + '|' +
  238. phone.replace(/(\d{3})(\d{4})(\d{4})/, '$1[- ]?$2[- ]?$3'),
  239. 'g'
  240. );
  241. remainingText = remainingText.replace(phoneReg, '').replace(/\s+/g, ' ').trim();
  242. }
  243. // 2. 识别省市区
  244. const { province, city, district, remainingText: textAfterArea } = recognizeArea(remainingText);
  245. remainingText = textAfterArea;
  246. // 3. 提取姓名
  247. let receiver = extractName(remainingText);
  248. if (receiver) {
  249. remainingText = remainingText.replace(receiver, '').trim();
  250. } else {
  251. const reverseText = remainingText.split('').reverse().join('');
  252. const reverseName = extractName(reverseText);
  253. if (reverseName) {
  254. receiver = reverseName.split('').reverse().join('');
  255. remainingText = remainingText.replace(new RegExp(receiver + '$'), '').trim();
  256. }
  257. }
  258. // 4. 处理详细地址(过滤重复省市区)
  259. let detail = remainingText;
  260. const areaPrefix = [
  261. `${province}${city}${district}`,
  262. `${province}${city}`,
  263. `${city}${district}`
  264. ];
  265. for (const prefix of areaPrefix) {
  266. if (detail.includes(prefix)) {
  267. detail = detail.replace(prefix, '').trim();
  268. break;
  269. }
  270. }
  271. // 5. 补充扩展字段
  272. const { provinceCode, cityCode, districtCode } = getAreaCodes(province, city, district);
  273. const { latitude, longitude } = getLatLng(province, city, district);
  274. const poiName = extractPOIName(detail);
  275. return {
  276. receiver, // 收件人姓名
  277. phone, // 手机号
  278. province, // 省份名称
  279. provinceCode, // 省份代码(如:440000)
  280. city, // 城市名称
  281. cityCode, // 城市代码(如:440300)
  282. district, // 区县名称
  283. districtCode, // 区县代码(如:440305)
  284. detail, // 详细地址
  285. poiName, // POI名称(建筑物/小区名称)
  286. latitude, // 纬度
  287. longitude, // 经度
  288. isDefault // 是否默认地址(默认false)
  289. };
  290. };
  291. // 导出函数
  292. module.exports = { parseFullAddress };