hadoop - Crear un error de tabla de colmena para cargar datos de Twitter
hive flume (2)
Intento crear una tabla externa e intentar cargar datos de Twitter en la tabla. Al crear la tabla, recibo un error de seguimiento y no puedo rastrear el error.
hive> ADD JAR /usr/local/hive/lib/hive-serdes-1.0-SNAPSHOT.jar
> ;
Added [/usr/local/hive/lib/hive-serdes-1.0-SNAPSHOT.jar] to class path
Added resources: [/usr/local/hive/lib/hive-serdes-1.0-SNAPSHOT.jar]
hive> CREATE EXTERNAL TABLE tweets (
> id BIGINT,
> created_at STRING,
> source STRING,
> favorited BOOLEAN,
> retweeted_status STRUCT<
> text:STRING,
> user:STRUCT<screen_name:STRING,name:STRING>,
> retweet_count:INT>,
> entities STRUCT<
> urls:ARRAY<STRUCT<expanded_url:STRING>>,
> user_mentions:ARRAY<STRUCT<screen_name:STRING,name:STRING>>,
> hashtags:ARRAY<STRUCT<text:STRING>>>,
> text STRING,
> user STRUCT<
> screen_name:STRING,
> name:STRING,
> friends_count:INT,
> followers_count:INT,
> statuses_count:INT,
> verified:BOOLEAN,
> utc_offset:INT,
> time_zone:STRING>,
> in_reply_to_screen_name STRING
> )
> PARTITIONED BY (datehour INT)
> ROW FORMAT SERDE ''com.cloudera.hive.serde.JSONSerDe''
> LOCATION ''/user/flume/tweets/01092015'';
a continuación está el error
FailedPredicateException(identifier,{useSQL11ReservedKeywordsForIdentifier()}?)
at org.apache.hadoop.hive.ql.parse.HiveParser_IdentifiersParser.identifier(HiveParser_IdentifiersParser.java:10924)
at org.apache.hadoop.hive.ql.parse.HiveParser.identifier(HiveParser.java:45850)
at org.apache.hadoop.hive.ql.parse.HiveParser.columnNameColonType(HiveParser.java:38211)
at org.apache.hadoop.hive.ql.parse.HiveParser.columnNameColonTypeList(HiveParser.java:36342)
at org.apache.hadoop.hive.ql.parse.HiveParser.structType(HiveParser.java:39707)
at org.apache.hadoop.hive.ql.parse.HiveParser.type(HiveParser.java:38655)
at org.apache.hadoop.hive.ql.parse.HiveParser.colType(HiveParser.java:38367)
at org.apache.hadoop.hive.ql.parse.HiveParser.columnNameType(HiveParser.java:38051)
at org.apache.hadoop.hive.ql.parse.HiveParser.columnNameTypeList(HiveParser.java:36203)
at org.apache.hadoop.hive.ql.parse.HiveParser.createTableStatement(HiveParser.java:5214)
at org.apache.hadoop.hive.ql.parse.HiveParser.ddlStatement(HiveParser.java:2640)
at org.apache.hadoop.hive.ql.parse.HiveParser.execStatement(HiveParser.java:1650)
at org.apache.hadoop.hive.ql.parse.HiveParser.statement(HiveParser.java:1109)
at org.apache.hadoop.hive.ql.parse.ParseDriver.parse(ParseDriver.java:202)
at org.apache.hadoop.hive.ql.parse.ParseDriver.parse(ParseDriver.java:166)
at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:396)
at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:308)
at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:1122)
at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1170)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1059)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1049)
at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:213)
at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:165)
at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:376)
at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:736)
at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:681)
at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:621)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at org.apache.hadoop.util.RunJar.run(RunJar.java:221)
at org.apache.hadoop.util.RunJar.main(RunJar.java:136)
FAILED: ParseException line 9:2 Failed to recognize predicate ''user''. Failed rule: ''identifier'' in column specification.
Y a continuación está la información de Twitter que está disponible en la ruta hdfs. ¿Cómo crear una tabla adecuada para los datos de Twitter a continuación?
{
"extended_entities": {
"media": [{
"display_url": "pic.twitter.com/9SoA83sVvP",
"indices": [100, 123],
"sizes": {
"small": {
"w": 340,
"h": 340,
"resize": "fit"
},
"large": {
"w": 480,
"h": 480,
"resize": "fit"
},
"thumb": {
"w": 150,
"h": 150,
"resize": "crop"
},
"medium": {
"w": 480,
"h": 480,
"resize": "fit"
}
},
"id_str": "685710180164579329",
"expanded_url": "http://twitter.com/add7dave/status/685710518456209408/video/1",
"media_url_https": "https://pbs.twimg.com/ext_tw_video_thumb/685710180164579329/pu/img/4wOqavTprNIaMgjK.jpg",
"id": 685710180164579329,
"type": "video",
"media_url": "http://pbs.twimg.com/ext_tw_video_thumb/685710180164579329/pu/img/4wOqavTprNIaMgjK.jpg",
"url": "https://t.co/9SoA83sVvP",
"video_info": {
"aspect_ratio": [1, 1],
"duration_millis": 7567,
"variants": [{
"content_type": "application/x-mpegURL",
"url": "https://video.twimg.com/ext_tw_video/685710180164579329/pu/pl/6JnchC_1FWviydJV.m3u8"
}, {
"content_type": "application/dash+xml",
"url": "https://video.twimg.com/ext_tw_video/685710180164579329/pu/pl/6JnchC_1FWviydJV.mpd"
}, {
"content_type": "video/mp4",
"bitrate": 320000,
"url": "https://video.twimg.com/ext_tw_video/685710180164579329/pu/vid/240x240/W7suov-YC1Iq1-QT.mp4"
}, {
"content_type": "video/webm",
"bitrate": 832000,
"url": "https://video.twimg.com/ext_tw_video/685710180164579329/pu/vid/480x480/bDG_UfEw3jBM7z4e.webm"
}, {
"content_type": "video/mp4",
"bitrate": 832000,
"url": "https://video.twimg.com/ext_tw_video/685710180164579329/pu/vid/480x480/bDG_UfEw3jBM7z4e.mp4"
}]
}
}]
},
"in_reply_to_status_id_str": null,
"in_reply_to_status_id": null,
"created_at": "Sat Jan 09 06:31:42 +0000 2016",
"in_reply_to_user_id_str": null,
"source": "<a href=/"http://twitter.com/download/android/" rel=/"nofollow/">Twitter for Android<//a>",
"retweet_count": 0,
"retweeted": false,
"geo": null,
"filter_level": "low",
"in_reply_to_screen_name": null,
"is_quote_status": false,
"id_str": "685710518456209408",
"in_reply_to_user_id": null,
"favorite_count": 0,
"id": 685710518456209408,
"text": "New video NO-17/n#BritanniaFilmfareAwards/n@GoodDayCookies/n@BritanniaIndLtd/nAmitabh Bachchan dialogue https://t.co/9SoA83sVvP",
"place": null,
"lang": "en",
"favorited": false,
"possibly_sensitive": false,
"coordinates": null,
"truncated": false,
"timestamp_ms": "1452321102142",
"entities": {
"urls": [],
"hashtags": [{
"indices": [16, 40],
"text": "BritanniaFilmfareAwards"
}],
"media": [{
"display_url": "pic.twitter.com/9SoA83sVvP",
"indices": [100, 123],
"sizes": {
"small": {
"w": 340,
"h": 340,
"resize": "fit"
},
"large": {
"w": 480,
"h": 480,
"resize": "fit"
},
"thumb": {
"w": 150,
"h": 150,
"resize": "crop"
},
"medium": {
"w": 480,
"h": 480,
"resize": "fit"
}
},
"id_str": "685710180164579329",
"expanded_url": "http://twitter.com/add7dave/status/685710518456209408/video/1",
"media_url_https": "https://pbs.twimg.com/ext_tw_video_thumb/685710180164579329/pu/img/4wOqavTprNIaMgjK.jpg",
"id": 685710180164579329,
"type": "photo",
"media_url": "http://pbs.twimg.com/ext_tw_video_thumb/685710180164579329/pu/img/4wOqavTprNIaMgjK.jpg",
"url": "https://t.co/9SoA83sVvP"
}],
"user_mentions": [{
"indices": [41, 56],
"screen_name": "GoodDayCookies",
"id_str": "2197439803",
"name": "Britannia Good Day",
"id": 2197439803
}, {
"indices": [57, 73],
"screen_name": "BritanniaIndLtd",
"id_str": "3281245460",
"name": "Britannia Industries",
"id": 3281245460
}],
"symbols": []
},
"contributors": null,
"user": {
"utc_offset": 19800,
"friends_count": 1517,
"profile_image_url_https": "https://pbs.twimg.com/profile_images/593327096736256001/TT8Ds75__normal.jpg",
"listed_count": 1,
"profile_background_image_url": "http://abs.twimg.com/images/themes/theme19/bg.gif",
"default_profile_image": false,
"favourites_count": 25,
"description": "Sharukhan, Kapil sharma , Narendra modi Fan (Supporter) be happy *↓*",
"created_at": "Thu Sep 15 08:04:58 +0000 2011",
"is_translator": false,
"profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme19/bg.gif",
"protected": false,
"screen_name": "add7dave",
"id_str": "373836462",
"profile_link_color": "9266CC",
"id": 373836462,
"geo_enabled": false,
"profile_background_color": "FFF04D",
"lang": "en",
"profile_sidebar_border_color": "000000",
"profile_text_color": "000000",
"verified": false,
"profile_image_url": "http://pbs.twimg.com/profile_images/593327096736256001/TT8Ds75__normal.jpg",
"time_zone": "Chennai",
"url": null,
"contributors_enabled": false,
"profile_background_tile": false,
"profile_banner_url": "https://pbs.twimg.com/profile_banners/373836462/1428993069",
"statuses_count": 21397,
"follow_request_sent": null,
"followers_count": 438,
"profile_use_background_image": true,
"default_profile": false,
"following": null,
"name": "aditya dave",
"location": "Bhavnagar, Gujarat",
"profile_sidebar_fill_color": "000000",
"notifications": null
}
}
Si mira https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL , puede ver una lista de palabras clave reservadas, de las cuales el user
es una. No puede nombrar a un user
columna.
Puedes `user`
nombre `user`
si quieres, y luego tus consultas se verían como
SELECT `user` FROM table;
pero como pueden ver, eso es un poco feo, así que probablemente sea mejor elegir un nombre de columna diferente.
También puede usar esto para deshabilitar el cheque:
SET hive.support.sql11.reserved.keywords=false;