问题描述
我使用从 IMDB 收集信息并将它们传输到 MYSQL 数据库的应用程序导入了一些数据.
I have imported some data using an application that collects info from IMDB and transfers them into a MYSQL database.
这些字段似乎没有被规范化并且在 1 个字段中包含许多值
It seems the fields have not been normalized and contained many values within 1 field
例如:
Table Movie
MovieID Movie_Title Written_By
1 Movie1 Person1, Person2
2 Movie2 Person3
3 Movie3 Person4, Person2, Person6
有没有办法将值分开并将它们插入到另一个表中,这样的东西并且没有任何重复?
Is there way to separate the values and have them inserted into another table to something like this and without any duplicates?
Table Writers
WriterID Written_By MovieId
1 Person1 1
2 Person2 1
3 Person3 3
我做了一些谷歌搜索,发现我应该使用 PHP 处理这些数据.但我对 PHP 一窍不通.
I did some googling and found that I am supposed to process this data using PHP. But I have no knowledge with PHP at all.
有没有办法只使用 MYSQL 来转换这些数据?
Is there anyway to convert this data using just MYSQL?
推荐答案
您可以使用使用游标的存储过程来解决这个问题,但它不是很优雅,但也不是逗号分隔的作者列表!
You can use a stored procedure which uses a cursor to solve this but it's not very elegant but neither is a comma separated list of writers !
以下代码来自类似问题,但您最好彻底检查一下.
Had the following code lying around from a similar question but you'd better check it thoroughly.
希望有帮助:)
mysql> select * from movies_unf;
+---------+-------------+------------------------------------------------------+
| movieID | movie_title | written_by |
+---------+-------------+------------------------------------------------------+
| 1 | movie1 | person1, person2 |
| 2 | movie2 | person3 |
| 3 | movie3 | person4, person2, person6 |
| 4 | movie4 | person4, person4, person1, person2, person1,person8, |
| 5 | movie1 | person1, person2 |
+---------+-------------+------------------------------------------------------+
5 rows in set (0.00 sec)
call normalise_movies_unf();
mysql> select * from movies;
+----------+--------+
| movie_id | title |
+----------+--------+
| 1 | movie1 |
| 2 | movie2 |
| 3 | movie3 |
| 4 | movie4 |
+----------+--------+
4 rows in set (0.00 sec)
mysql> select * from writers;
+-----------+---------+
| writer_id | name |
+-----------+---------+
| 1 | person1 |
| 2 | person2 |
| 3 | person3 |
| 4 | person4 |
| 6 | person6 |
| 12 | person8 |
+-----------+---------+
6 rows in set (0.00 sec)
mysql> select * from movie_writers;
+----------+-----------+
| movie_id | writer_id |
+----------+-----------+
| 1 | 1 |
| 1 | 2 |
| 2 | 3 |
| 3 | 2 |
| 3 | 4 |
| 3 | 6 |
| 4 | 1 |
| 4 | 2 |
| 4 | 4 |
| 4 | 12 |
+----------+-----------+
10 rows in set (0.00 sec)
示例表格
drop table if exists movies_unf;
create table movies_unf
(
movieID int unsigned not null primary key,
movie_title varchar(255) not null,
written_by varchar(1024) not null
)engine=innodb;
insert into movies_unf values
(1,'movie1','person1, person2'),
(2,'movie2','person3'),
(3,'movie3','person4, person2, person6'),
(4,'movie4','person4, person4, person1, person2, person1,person8,'), -- dodgy writers
(5,'movie1','person1, person2'); -- dodgy movie
drop table if exists movies;
create table movies
(
movie_id int unsigned not null auto_increment primary key,
title varchar(255) unique not null
)engine=innodb;
drop table if exists writers;
create table writers
(
writer_id int unsigned not null auto_increment primary key,
name varchar(255) unique not null
)engine=innodb;
drop table if exists movie_writers;
create table movie_writers
(
movie_id int unsigned not null,
writer_id int unsigned not null,
primary key (movie_id, writer_id)
)engine=innodb;
存储过程
drop procedure if exists normalise_movies_unf;
delimiter #
create procedure normalise_movies_unf()
begin
declare v_movieID int unsigned default 0;
declare v_movie_title varchar(255);
declare v_writers varchar(1024);
declare v_movie_id int unsigned default 0;
declare v_writer_id int unsigned default 0;
declare v_name varchar(255);
declare v_csv_done tinyint unsigned default 0;
declare v_csv_idx int unsigned default 0;
declare v_done tinyint default 0;
declare v_cursor cursor for
select distinct movieID, movie_title, written_by from movies_unf;
declare continue handler for not found set v_done = 1;
start transaction;
open v_cursor;
repeat
fetch v_cursor into v_movieID, v_movie_title, v_writers;
set v_movie_title = trim(v_movie_title);
set v_writers = replace(v_writers,' ', '');
-- insert the movie
insert ignore into movies (title) values (v_movie_title);
select movie_id into v_movie_id from movies where title = v_movie_title;
-- split the out the writers and insert
set v_csv_done = 0;
set v_csv_idx = 1;
while not v_csv_done do
set v_name = substring(v_writers, v_csv_idx,
if(locate(',', v_writers, v_csv_idx) > 0,
locate(',', v_writers, v_csv_idx) - v_csv_idx,
length(v_writers)));
set v_name = trim(v_name);
if length(v_name) > 0 then
set v_csv_idx = v_csv_idx + length(v_name) + 1;
insert ignore into writers (name) values (v_name);
select writer_id into v_writer_id from writers where name = v_name;
insert ignore into movie_writers (movie_id, writer_id) values (v_movie_id, v_writer_id);
else
set v_csv_done = 1;
end if;
end while;
until v_done end repeat;
close v_cursor;
commit;
truncate table movies_unf;
end#
delimiter ;
编辑
修改 sproc 使其不会跳过关键值!
Amended sproc so that it doesnt skip key values !
drop procedure if exists normalise_movies_unf;
delimiter #
create procedure normalise_movies_unf()
begin
declare v_movieID int unsigned default 0;
declare v_movie_title varchar(255);
declare v_writers varchar(1024);
declare v_movie_id int unsigned default 0;
declare v_writer_id int unsigned default 0;
declare v_name varchar(255);
declare v_csv_done tinyint unsigned default 0;
declare v_csv_idx int unsigned default 0;
declare v_done tinyint default 0;
declare v_cursor cursor for
select distinct movieID, movie_title, written_by from movies_unf;
declare continue handler for not found set v_done = 1;
start transaction;
open v_cursor;
repeat
fetch v_cursor into v_movieID, v_movie_title, v_writers;
set v_movie_title = trim(v_movie_title);
set v_writers = replace(v_writers,' ', '');
-- insert the movie
if not exists (select 1 from movies where title = v_movie_title) then
insert ignore into movies (title) values (v_movie_title);
end if;
select movie_id into v_movie_id from movies where title = v_movie_title;
-- split the out the writers and insert
set v_csv_done = 0;
set v_csv_idx = 1;
while not v_csv_done do
set v_name = substring(v_writers, v_csv_idx,
if(locate(',', v_writers, v_csv_idx) > 0,
locate(',', v_writers, v_csv_idx) - v_csv_idx,
length(v_writers)));
set v_name = trim(v_name);
if length(v_name) > 0 then
set v_csv_idx = v_csv_idx + length(v_name) + 1;
if not exists (select 1 from writers where name = v_name) then
insert ignore into writers (name) values (v_name);
end if;
select writer_id into v_writer_id from writers where name = v_name;
insert ignore into movie_writers (movie_id, writer_id) values (v_movie_id, v_writer_id);
else
set v_csv_done = 1;
end if;
end while;
until v_done end repeat;
close v_cursor;
commit;
truncate table movies_unf;
end#
delimiter ;
这篇关于MYSQL - 将数据拆分为多行的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持跟版网!